1 Introduction

This webbook contains all the code used for data analysis in study of the individual-level metagenomic data of Podarcis muralis and Podarcis liolepis lizards from different environments during an experimental setup.

1.1 Prepare the R environment

1.1.1 Environment

To reproduce all the analyses locally, clone this repository in your computer using:

RStudio > New Project > Version Control > Git

And indicating the following git repository:

https://github.com/alberdilab/lizard_fmt_effectiveness.git

Once the R project has been created, follow the instructions and code chunks shown in this webbook.

1.1.2 Libraries

The following R packages are required for the data analysis.

# Base
library(R.utils)
library(knitr)
library(tidyverse)
library(devtools)
library(tinytable)
library(janitor)
library(readxl)

# For tree handling
library(ape)
library(phyloseq)
library(phytools)

# For plotting
library(ggplot2)
library(ggrepel)
library(ggpubr)
library(ggnewscale)
library(gridExtra)
library(ggtreeExtra)
library(ggtree)
library(ggh4x)
library(rstatix)
#library(ggpmisc)

# For statistics
library(spaa)
library(vegan)
library(Rtsne)
library(geiger)
library(hilldiv2)
library(distillR)
library(broom.mixed)
library(corrplot)
library(nlme)
library(pairwiseAdonis)
library(ANCOMBC)
library(lme4)
library(nlme)
library(hilldiv2)
library(emmeans)

2 Prepare data

2.1 Load data

Load the original data files outputted by the bioinformatic pipeline.

2.1.1 Sample metadata

sample_metadata <- read_tsv("data/metadata.tsv")%>%
  mutate(time_point = sub("^\\d+_", "", time_point)) %>% 
  filter(!(time_point == "Wild" & Population == "Hot_dry")) %>% 
  filter(!(time_point == "Transplant1" & type == "Control")) %>% 
  filter(!(time_point == "Transplant2" & type == "Control")) %>% 
  filter(!(time_point == "Post-FMT1" & type == "Control")) %>% 
  filter(!(time_point == "Post-FMT2" & type == "Control")) %>% 
  filter(!(time_point == "Post-FMT1" & type == "Hot_control")) %>% 
  filter(!(time_point == "Post-FMT2" & type == "Hot_control")) %>% 
  filter(individual!="LI1_2nd_6") %>% 
  mutate(time_point=str_remove_all(time_point, "Post-"))

2.1.2 Read counts

read_counts_raw <- read_tsv("data/genome.count.tsv") %>%
    rename(genome=1)

#Transformation of read_counts to combine data from both sequence rounds
merge_and_rename <- function(read_counts_raw) {
  read_counts_raw %>%
    # Gather the columns into long format
    pivot_longer(cols = -genome, names_to = "col") %>%
    # Extract prefix
    mutate(prefix = gsub("^(.*?)_.*", "\\1", col)) %>%
    # Group by prefix and genome, then summarize
    group_by(prefix, genome) %>%
    summarize(value = sum(value)) %>%
    # Spread the data back to wide format
    pivot_wider(names_from = prefix, values_from = value)
}

read_counts <- merge_and_rename(read_counts_raw)

2.1.3 Genome base hits

genome_hits_raw <- read_tsv("data/genome.covered_bases.tsv") %>%
    rename(genome=1)

#Transformation of genome_hits to combine data from both sequence rounds
merge_and_rename <- function(genome_hits_raw) {
  genome_hits_raw %>%
    # Gather the columns into long format
    pivot_longer(cols = -genome, names_to = "col") %>%
    # Extract prefix
    mutate(prefix = gsub("^(.*?)_.*", "\\1", col)) %>%
    # Group by prefix and genome, then summarize
    group_by(prefix, genome) %>%
    summarize(value = sum(value)) %>%
    # Spread the data back to wide format
    pivot_wider(names_from = prefix, values_from = value)
}

genome_hits <- merge_and_rename(genome_hits_raw)

2.1.4 Genome taxonomy

genome_taxonomy <- read_tsv("data/gtdbtk.summary.tsv") %>%
  select(mag_id = user_genome, classification) %>%
  separate(
    classification,
    into = c("domain", "phylum", "class", "order", "family", "genus", "species"),
    sep = ";") %>%
    rename(genome=1)

2.1.5 Genome quality

genome_quality <- read_tsv("data/quality_report.tsv") %>%
  select(
    genome = Name, 
    completeness = Completeness, 
    contamination = Contamination,
    length = Genome_Size, 
    gc = GC_Content
  )
genome_quality<-genome_quality %>% 
  mutate (genome=str_remove_all(genome,".fa"))

#Filter MAGs with over 70% completeness and less than 10% contamination
genome_quality <- genome_quality %>%
  filter(completeness > 70 & contamination < 10)

2.1.6 Genome tree

genome_tree <- read_tree("data/gtdbtk.backbone.bac120.classify.tree")
genome_tree$tip.label <- str_replace_all(genome_tree$tip.label,"'", "") #remove single quotes in MAG names

#Filter genome_taxonomy to keep MAGs with over 70% completeness and less than 10% contamination
genome_taxonomy <- genome_taxonomy %>%
  semi_join(genome_quality, by = "genome")
genome_tree <- keep.tip(genome_tree, tip=genome_taxonomy$genome) # keep only MAG tips

2.1.7 Genome annotations

genome_annotations <- read_tsv("data/annotations.tsv.xz") %>%
    rename(gene=1, genome=2, contig=3)

genome_gifts <- distill(genome_annotations,GIFT_db,genomecol=2,annotcol=c(9,10,19), verbosity=F)


#Filter only the MAGs with over 70% completeness and less than 10% contamination
genome_annotations <- genome_annotations %>%
  semi_join(genome_quality, by = "genome")

2.2 Create working objects

Transform the original data files into working objects for downstream analyses.

2.2.1 Merge genome taxonomy and quality

genome_metadata <- genome_taxonomy %>%
  inner_join(genome_quality,by=join_by(genome==genome)) #join quality

2.2.2 Calculate genome coverage

#Filter genome_hits for the MAGs with over 70% completeness and less than 10% contamination
genome_hits <- genome_hits %>%
  semi_join(genome_quality, by = "genome")

genome_coverage <- genome_hits %>%
  mutate(across(where(is.numeric), ~ ./genome_metadata$length))

2.3 Filtering

Remove low sequencing depth samples

#Counts_raw
columns_to_exclude <- c("AD16","AD23", "AD25", "AD91") # Columns to exclude
read_counts <- read_counts %>%
  select(-columns_to_exclude)

#Metadata
sample_metadata <- sample_metadata %>%
  filter(Tube_code != "AD16") %>%
  filter(Tube_code != "AD23") %>%
  filter(Tube_code != "AD25") %>%
  filter(Tube_code != "AD91")

#Coverage_table
columns_to_exclude <- c("AD16","AD23", "AD25", "AD91")  # Columns to exclude
genome_coverage <- genome_coverage %>%
  select(-columns_to_exclude)

2.3.1 Filter reads by coverage

#Filter read_counts for the MAGs with over 70% completeness and less than 10% contamination
read_counts<- read_counts %>%
  semi_join(genome_quality, by = "genome")

min_coverage=0.3
read_counts_filt <- genome_coverage %>%
  mutate(across(where(is.numeric), ~ ifelse(. > min_coverage, 1, 0))) %>%
  mutate(across(-1, ~ . * read_counts[[cur_column()]])) 

2.3.2 Transform reads into genome counts

readlength=150
genome_counts <- read_counts %>%
  mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))
readlength=150
genome_counts_filt <- read_counts_filt %>%
  mutate(across(where(is.numeric), ~ . / (genome_metadata$length / readlength) ))

2.4 Load data statistics

2.4.1 Raw reads

raw_reads <-
  "data/multiqc_general_stats_2.txt" %>%
  read_tsv() %>%
  select(
    sample = Sample,
    raw_reads = `total_sequences`
  ) %>%
  mutate(sample_prefix = str_extract(sample, "^[^_]+")) %>%
  group_by(sample_prefix) %>%
  summarize(raw_reads = sum(raw_reads, na.rm = TRUE)) %>%
  rename(sample = sample_prefix)  %>%
  mutate(sample = str_remove(sample, "^fastp \\|\\s*")) %>%
  filter(sample!="AD16") %>%
  filter(sample!="AFV13") %>%
  filter(sample!="AD23") %>%
  filter(sample!= "AD25") %>%
  filter(sample!= "AD91")

2.4.2 Quality-filtered reads

fastp_reads <-
  "data/multiqc_general_stats.txt" %>%
  read_tsv() %>%
  select(
    sample = Sample,
    trimmed_reads = `total_sequences`
  ) %>%
  mutate(sample_prefix = str_extract(sample, "^[^_]+")) %>%
  group_by(sample_prefix) %>%
  summarize(trimmed_reads = sum(trimmed_reads, na.rm = TRUE)) %>%
  rename(sample = sample_prefix) %>% 
  filter(!str_detect(sample, "nonlizard \\|")) %>%
  filter(!str_detect(sample, "lizard \\|")) %>%
  filter(!str_detect(sample, "refseq500 \\|"))  %>%
  mutate(sample = str_remove(sample, "^fastp \\|\\s*")) %>%
  filter(sample!="AD16") %>%
  filter(sample!="AFV13") %>%
  filter(sample!="AD23") %>%
  filter(sample!= "AD25") %>%
  filter(sample!= "AD91")

2.4.3 Host-mapped reads

host_mapped <-
  "data/multiqc_general_stats.txt" %>%
  read_tsv() %>%
  filter(str_detect(Sample, "lizard")) %>%
  select(
    sample = Sample,
    host_mapped = `reads_mapped`,
    mapping_total = `raw_total_sequences`
  ) %>%
  mutate(
    host_unmapped = mapping_total - host_mapped
  ) %>%
  filter(!is.na(host_mapped)) %>%
  separate(
    col = sample,
    into = c("host_name", "sample"),
    sep = " \\| "
  ) %>%
  rename(mapped = host_mapped, unmapped = host_unmapped) %>%
  select(-mapping_total) %>%
  pivot_longer(-host_name:-sample) %>%
  mutate(
    name = str_glue("{name}_{host_name}")
  ) %>%
  select(-host_name) %>%
  pivot_wider() %>%
  mutate(sample = str_extract(sample, "^[^_]+")) %>%
  group_by(sample) %>%
  summarize(
    mapped_lizard = sum(mapped_lizard),
    unmapped_lizard = sum(unmapped_lizard)) %>%
  filter(sample!="AD16") %>%
  filter(sample!="AFV13") %>%
  filter(sample!="AD23") %>%
  filter(sample!= "AD25") %>%
  filter(sample!= "AD91")

2.4.4 Prokaryotic fraction

singlem <-
  "data/singleM.tsv" %>%
  read_tsv() %>%
  distinct() %>%
  mutate(
    sample = sample %>% str_remove_all("_1$"),
   read_fraction = read_fraction %>% str_remove("%") %>% as.numeric(),
   read_fraction = read_fraction / 100
  ) %>%
  select(
   sample,
   singlem_prokaryotic_bases = bacterial_archaeal_bases,
   singlem_metagenome_size = metagenome_size,
   singlem_read_fraction = read_fraction,
  ) %>%
mutate(sample_prefix = str_extract(sample, "^[^_]+")) %>%
  group_by(sample_prefix) %>%
  summarize(
    singlem_prokaryotic_bases = sum(singlem_prokaryotic_bases),
    singlem_metagenome_size = sum(singlem_metagenome_size),
    singlem_read_fraction = mean(singlem_read_fraction)
  ) %>%
  rename(sample = sample_prefix)%>%
   filter(sample!="AD16") %>%
  filter(sample!="AFV13") %>%
  filter(sample!="AD23") %>%
  filter(sample!= "AD25") %>%
  filter(sample!= "AD91")

2.4.5 MAG-mapped reads

mag_mapping <-
  read_tsv("data/contig.count.tsv.xz") %>%
  pivot_longer(-sequence_id) %>%
  summarise(value = sum(value), .by = "name") %>%
  rename(sample = name, mapped_mags = value) %>%
  mutate(sample_prefix = str_extract(sample, "^[^_]+")) %>%
    group_by(sample_prefix) %>%
    summarize(
      mapped_mags = sum(mapped_mags)
    ) %>%
    rename(sample = sample_prefix)%>%
  filter(sample!="AD16") %>%
  filter(sample!="AFV13") %>%
  filter(sample!="AD23") %>%
  filter(sample!= "AD25") %>%
  filter(sample!= "AD91")

2.4.6 Wrap data statistics

data_stats <- raw_reads %>%
  left_join(fastp_reads) %>%
  left_join(host_mapped) %>%
  left_join(singlem) %>%
  left_join(mag_mapping)

data_stats<- data_stats %>%
  filter(!str_detect(sample, "nonlizard \\|")) %>%
  filter(!str_detect(sample, "lizard \\|")) %>%
  filter(!str_detect(sample, "refseq500 \\|"))

2.5 Prepare color scheme

AlberdiLab projects use unified color schemes developed for the Earth Hologenome Initiative, to facilitate figure interpretation.

phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
    right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    arrange(match(genome, genome_tree$tip.label)) %>%
    select(phylum, colors) %>% 
    unique() %>%
    arrange(phylum) %>%
    pull(colors, name=phylum)

2.6 Wrap working objects

All working objects are wrapped into a single Rdata object to facilitate downstream usage.

save(sample_metadata, 
     genome_metadata, 
     read_counts, 
     genome_counts, 
     genome_counts_filt, 
     genome_tree,
     phylum_colors,
     data_stats,
     file = "data/03032025data.Rdata")
save(genome_gifts, 
     file = "data/04032025gifts.Rdata")

3 Data statistics

load("data/03032025data.Rdata")

3.1 Sequencing reads statistics

data_stats_filter<-data_stats %>%
  left_join(., sample_metadata[c(1, 4,7,10)], by = join_by(sample == Tube_code))  %>%
  filter(Population!="NA")
data_stats_filter$raw_reads %>% sum()
[1] 3044539460
data_stats_filter$raw_reads %>% mean()
[1] 30445395
data_stats_filter$raw_reads %>% sd()
[1] 13549337

3.2 DNA fractions

#Overall
data_stats_filter %>%
    mutate(mapped_perc=mapped_mags/trimmed_reads) %>%
    summarise(mean=mean(mapped_perc),sd=sd(mapped_perc))
# A tibble: 1 × 2
   mean    sd
  <dbl> <dbl>
1 0.400 0.218
data_stats_filter %>%
  mutate(
    low_quality = raw_reads - trimmed_reads,
    unmapped_reads = trimmed_reads - mapped_lizard - mapped_mags
  ) %>%
  select(sample, low_quality, mapped_lizard, mapped_mags, unmapped_reads) %>%
  pivot_longer(-sample) %>%
  mutate(name=factor(name,levels=c("low_quality","mapped_lizard","unmapped_reads","mapped_mags"))) %>%
  left_join(., sample_metadata, by = join_by(sample == Tube_code)) %>%
  filter(!is.na(time_point)) %>%
  ggplot(aes(x = sample, y = value, fill = name)) +
      geom_bar(stat = "identity", position = "fill") +
      scale_fill_manual(name="Sequence type",
                    breaks=c("low_quality","mapped_lizard","unmapped_reads","mapped_mags"),
                    labels=c("Low quality","Mapped to host","Unmapped","Mapped to MAGs"),
                    values=c("#CCCCCC", "#bcdee1", "#d8b8a3","#93655c"))+
      facet_grid(~time_point, scales = "free", labeller = labeller(time_point=c("0_Wild"="Wild","1_Acclimation"="Acclimation", "2_Antibiotics"="Antibiotics",
                                                                                "3_Transplant1"="Transplant1", "4_Transplant2"="Transplant2", "5_Post-FMT1"="Post1",
                                                                                "6_Post-FMT2"="Post2"))) +
      theme_minimal() +
      theme(axis.text.x = element_text(angle = 90, hjust = 1, size=0)) +
      labs(y="DNA sequence fraction",x="Samples")

3.3 Recovered microbial fraction

data_stats_filter %>%
  mutate(
    unmapped_reads = trimmed_reads - mapped_lizard - mapped_mags,
    mag_proportion = mapped_mags / (mapped_mags + unmapped_reads),
    singlem_read_fraction = singlem_read_fraction
  ) %>%
  select(sample, mag_proportion, singlem_read_fraction) %>%
  mutate(
    mag_proportion = if_else(singlem_read_fraction == 0, 0, mag_proportion),
    singlem_read_fraction = if_else(singlem_read_fraction == 0, NA, singlem_read_fraction),
    singlem_read_fraction = if_else(singlem_read_fraction < mag_proportion, NA, singlem_read_fraction),
    singlem_read_fraction = if_else(singlem_read_fraction > 1, 1, singlem_read_fraction)
  ) %>%
  pivot_longer(-sample, names_to = "proportion", values_to = "value") %>%
  mutate(
    proportion = factor(
      proportion,
      levels = c("mag_proportion", "singlem_read_fraction")
    )
  ) %>%
  left_join(., sample_metadata, by = join_by(sample == Tube_code)) %>%
  filter(!is.na(time_point)) %>%
  ggplot(aes(x = sample, y = value, color = proportion)) +
      geom_line(aes(group = sample), color = "#f8a538") +
      geom_point() +
      scale_color_manual(name="Proportion",
                    breaks=c("mag_proportion","singlem_read_fraction"),
                    labels=c("Recovered","Estimated"),
                    values=c("#52e1e8", "#876b53"))+
      theme_minimal() +
      facet_grid(~time_point, scales = "free", space="free", labeller = labeller(time_point=c("0_Wild"="Wild","1_Acclimation"="Acclimation", "2_Antibiotics"="Antibiotics",
                                                                                "3_Transplant1"="Transplant1", "4_Transplant2"="Transplant2", "5_Post-FMT1"="Post1",
                                                                                "6_Post-FMT2"="Post2")))+
      labs(y = "Samples", x = "Prokaryotic fraction") +
      scale_y_continuous(limits = c(0, 1)) +
      theme(
        axis.text.y = element_text(size = 4),
        axis.text.x = element_text( angle = 90, vjust = 0.5, hjust = 1, size = 0),
        legend.position = "right"
      )

3.3.1 Domain-adjusted mapping rate (DAMR)

data_stats_filter %>%
  mutate(
    unmapped_reads = trimmed_reads - mapped_lizard - mapped_mags,
    mag_proportion = mapped_mags / (mapped_mags + unmapped_reads),
    singlem_read_fraction = singlem_read_fraction
  ) %>%
  mutate(damr=pmin(1, mag_proportion/singlem_read_fraction)) %>%
  filter(!is.na(time_point)) %>%
  select(sample,damr, time_point, Population, type) %>%
  #group_by(type) %>%
  #summarise(mean=mean(damr),sd=sd(damr)) %>%
  tt()
sample damr time_point Population type
AC79 1.0000000 Acclimation Cold_wet Control
AC80 1.0000000 Acclimation Cold_wet Treatment
AC81 0.9813013 Acclimation Cold_wet Treatment
AC82 0.7490771 Acclimation Cold_wet Treatment
AC83 1.0000000 Acclimation Cold_wet Control
AC84 1.0000000 Acclimation Cold_wet Control
AC85 0.8363777 Acclimation Cold_wet Control
AC86 0.8800275 Acclimation Cold_wet Control
AC87 0.9672854 Acclimation Cold_wet Treatment
AC88 0.3142108 Acclimation Cold_wet Control
AC89 0.9338962 Acclimation Cold_wet Treatment
AC90 1.0000000 Acclimation Cold_wet Control
AC91 1.0000000 Acclimation Cold_wet Control
AC92 1.0000000 Acclimation Cold_wet Control
AC93 1.0000000 Acclimation Cold_wet Treatment
AC94 0.9159348 Acclimation Cold_wet Treatment
AC95 0.9239904 Acclimation Cold_wet Treatment
AC96 0.9425962 Acclimation Cold_wet Treatment
AC97 0.9394886 Acclimation Hot_dry Hot_control
AC98 1.0000000 Acclimation Hot_dry Hot_control
AC99 1.0000000 Acclimation Hot_dry Hot_control
AD01 1.0000000 Acclimation Hot_dry Hot_control
AD02 1.0000000 Acclimation Hot_dry Hot_control
AD03 1.0000000 Acclimation Hot_dry Hot_control
AD04 0.8896057 Acclimation Hot_dry Hot_control
AD05 1.0000000 Acclimation Hot_dry Hot_control
AD07 0.9515379 Acclimation Hot_dry Hot_control
AD08 0.7045003 Antibiotics Cold_wet Control
AD09 1.0000000 Antibiotics Cold_wet Control
AD10 0.9598720 Antibiotics Cold_wet Control
AD11 0.6230176 Antibiotics Cold_wet Control
AD12 0.7487463 Antibiotics Cold_wet Control
AD13 1.0000000 Antibiotics Cold_wet Control
AD14 0.4536529 Antibiotics Cold_wet Control
AD15 1.0000000 Antibiotics Cold_wet Control
AD17 0.8774871 Antibiotics Cold_wet Treatment
AD18 0.4991585 Antibiotics Cold_wet Treatment
AD19 0.4060523 Antibiotics Cold_wet Treatment
AD20 0.5227482 Antibiotics Cold_wet Treatment
AD21 0.6551882 Antibiotics Cold_wet Treatment
AD22 0.9106725 Antibiotics Cold_wet Treatment
AD24 0.5972915 Antibiotics Cold_wet Treatment
AD26 0.9204825 Antibiotics Hot_dry Hot_control
AD27 0.5215059 Antibiotics Hot_dry Hot_control
AD28 0.8180607 Antibiotics Hot_dry Hot_control
AD29 0.7175978 Antibiotics Hot_dry Hot_control
AD30 0.9937574 Antibiotics Hot_dry Hot_control
AD32 0.6710817 Antibiotics Hot_dry Hot_control
AD33 0.6778368 Antibiotics Hot_dry Hot_control
AD34 0.4642326 Antibiotics Hot_dry Hot_control
AD46 0.8708468 Transplant1 Cold_wet Treatment
AD47 0.8328653 Transplant1 Cold_wet Treatment
AD49 0.8940294 Transplant1 Cold_wet Treatment
AD50 0.8757711 Transplant1 Cold_wet Treatment
AD51 0.8479193 Transplant1 Cold_wet Treatment
AD52 0.8596870 Transplant1 Cold_wet Treatment
AD53 0.8528974 Transplant1 Cold_wet Treatment
AD54 1.0000000 Transplant1 Hot_dry Hot_control
AD65 1.0000000 Transplant2 Cold_wet Treatment
AD66 0.8821027 Transplant2 Cold_wet Treatment
AD68 0.8820274 Transplant2 Cold_wet Treatment
AD70 0.9590793 Transplant2 Cold_wet Treatment
AD71 0.8360540 Transplant2 Cold_wet Treatment
AD72 0.8835253 Transplant2 Cold_wet Treatment
AD73 1.0000000 Transplant2 Hot_dry Hot_control
AD75 1.0000000 FMT1 Cold_wet Treatment
AD76 0.9862581 FMT1 Cold_wet Treatment
AD77 0.8963376 FMT1 Cold_wet Treatment
AD78 1.0000000 FMT1 Cold_wet Treatment
AD80 0.8808155 FMT1 Cold_wet Treatment
AD83 1.0000000 FMT1 Cold_wet Treatment
AD84 1.0000000 FMT1 Cold_wet Treatment
AD88 1.0000000 FMT1 Cold_wet Treatment
AE04 1.0000000 FMT2 Cold_wet Treatment
AE05 1.0000000 FMT2 Cold_wet Treatment
AE91 0.9259146 FMT2 Cold_wet Treatment
AE94 0.9152960 FMT2 Cold_wet Treatment
AE95 1.0000000 FMT2 Cold_wet Treatment
AE96 0.7786112 FMT2 Cold_wet Treatment
AE99 0.8513354 FMT2 Cold_wet Treatment
AF01 0.9046369 FMT2 Cold_wet Treatment
AF03 0.9030087 FMT2 Cold_wet Treatment
AFU87 0.8810513 Wild Cold_wet Control
AFU88 0.8317800 Wild Cold_wet Treatment
AFU91 0.8923699 Wild Cold_wet Treatment
AFU92 0.8094776 Wild Cold_wet Treatment
AFU93 0.8517368 Wild Cold_wet Control
AFU94 0.8325385 Wild Cold_wet Treatment
AFU95 0.8419270 Wild Cold_wet Treatment
AFU96 0.8326820 Wild Cold_wet Control
AFU97 0.8107271 Wild Cold_wet Treatment
AFU98 0.7506522 Wild Cold_wet Control
AFU99 0.8582371 Wild Cold_wet Treatment
AFV01 0.9331539 Wild Cold_wet Treatment
AFV02 0.8316460 Wild Cold_wet Treatment
AFV03 0.8752591 Wild Cold_wet Control
AFV04 0.9180285 Wild Cold_wet Control
AFV05 1.0000000 Wild Cold_wet Control
AFV06 1.0000000 Wild Cold_wet Control
AFV07 0.8460805 Wild Cold_wet Control

4 Diversity analysis

load("data/03032025data.Rdata")
load("data/04032025gifts.Rdata")
load("data/beta_03032025.Rdata")
genome_counts_filt <- genome_counts_filt %>% 
  column_to_rownames(var = "genome") %>%
  filter(rowSums(. != 0, na.rm = TRUE) > 0)%>% 
  select(where(~ sum(.) > 0)) %>% 
  select(-AC85) %>% 
  rownames_to_column(., "genome")

genome_tree <- keep.tip(genome_tree, tip=genome_counts_filt$genome)

table <- genome_counts_filt %>%
  t() %>%
  row_to_names(row_number = 1) %>% 
  as.data.frame() %>% 
  mutate_if(is.character,as.numeric) %>% 
  rownames_to_column(., "sample")
  
master_table <- sample_metadata %>%
  mutate(sample=Tube_code) %>%
  mutate(Tube_code=str_remove_all(Tube_code, "_a")) %>% 
  filter(Tube_code %in% table$sample) %>%
  mutate(treatment = sub("^\\d+_", "", time_point))%>% 
  left_join(., table, by=join_by("Tube_code" =="sample"))

sample_metadata <- master_table %>% 
  select(1:13)

genome_counts_filt <- master_table %>% 
  select(12,14:323) %>% 
  t() %>%
  row_to_names(row_number = 1) %>% 
  as.data.frame() %>% 
  mutate_if(is.character,as.numeric) %>% 
  filter(rowSums(. != 0, na.rm = TRUE) > 0)%>%
  dplyr::select(where(~ !all(. == 0)))%>% 
  rownames_to_column(., "genome")

genome_counts_filtering <- master_table %>% 
  select(12,14:323) %>% 
  t() %>%
  row_to_names(row_number = 1) %>% 
  as.data.frame() %>% 
  mutate_if(is.character,as.numeric) %>% 
  filter(rowSums(. != 0, na.rm = TRUE) > 0)%>%
  dplyr::select(where(~ !all(. == 0)))

genome_tree <- keep.tip(genome_tree, tip=genome_counts_filt$genome)
#match_data(genome_counts_filtering,genome_tree)

genome_metadata <- genome_metadata %>% 
  filter(genome %in% genome_counts_filt$genome)

4.1 Calculate diversities

4.1.1 Alpha diversity

# Calculate Hill numbers
richness <- genome_counts_filt %>%
  column_to_rownames(var = "genome") %>%
  dplyr::select(where(~ !all(. == 0))) %>%
  hilldiv(., q = 0) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(richness = 1) %>%
  rownames_to_column(var = "sample")

neutral <- genome_counts_filt %>%
  column_to_rownames(var = "genome") %>%
  dplyr::select(where(~ !all(. == 0))) %>%
  hilldiv(., q = 1) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(neutral = 1) %>%
  rownames_to_column(var = "sample")

phylogenetic <- genome_counts_filt %>%
  column_to_rownames(var = "genome") %>%
  dplyr::select(where(~ !all(. == 0))) %>%
  hilldiv(., q = 1, tree = genome_tree) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(phylogenetic = 1) %>%
  rownames_to_column(var = "sample")

genome_gifts1 <- genome_gifts[genome_counts_filt$genome,]
genome_gifts1 <- genome_gifts1[, colSums(genome_gifts1 != 0) > 0]

genome_counts_filt1 <- genome_counts_filt[genome_counts_filt$genome %in% rownames(genome_gifts1),]
genome_counts_filt1 <- genome_counts_filt1 %>% 
  remove_rownames() %>% 
  column_to_rownames(var = "genome") %>% 
      filter(rowSums(. != 0, na.rm = TRUE) > 0) %>% 
  rownames_to_column(., "genome")

# Merge all metrics
alpha_div <- richness %>%
  full_join(neutral, by = join_by(sample == sample)) %>%
  full_join(phylogenetic, by = join_by(sample == sample))
treatment_colors <- c('#008080', "#d57d2c")

4.1.2 Beta diversity

beta_q0n <- genome_counts_filt %>%
  column_to_rownames(., "genome") %>%
  filter(rowSums(. != 0, na.rm = TRUE) > 0) %>%
  select_if(~!all(. == 0)) %>%
  hillpair(., q = 0)

beta_q1n <- genome_counts_filt %>%
  column_to_rownames(., "genome") %>%
  filter(rowSums(. != 0, na.rm = TRUE) > 0) %>%
  select_if(~!all(. == 0)) %>%
  hillpair(., q = 1)

beta_q1p <- genome_counts_filt %>%
  column_to_rownames(., "genome") %>%
  filter(rowSums(. != 0, na.rm = TRUE) > 0) %>%
  select_if(~!all(. == 0)) %>%
  hillpair(., q = 1, tree = genome_tree)

genome_gifts1 <- genome_gifts[genome_counts_filt$genome,]
genome_gifts1 <- genome_gifts1[, colSums(genome_gifts1 != 0) > 0]
genome_counts_filt1 <- genome_counts_filt[genome_counts_filt$genome %in% rownames(genome_gifts1),]
save(beta_q0n, 
     beta_q1n, 
     beta_q1p, 
     file = "data/beta_03032025.Rdata")

4.2 Does captivity change the microbial community?

alpha_div_meta <- alpha_div %>%
  left_join(sample_metadata, by = join_by(sample == sample))%>%
  filter(Population == "Cold_wet" & time_point %in% c("Acclimation", "Wild") ) 

4.2.1 Alpha diversity

alpha_div %>%
  pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
  left_join(., sample_metadata, by = "sample") %>%
  mutate(metric=factor(metric,levels=c("richness", "neutral", "phylogenetic"))) %>%
  filter(Population == "Cold_wet" & time_point %in% c("Acclimation", "Wild") ) %>% 
  ggplot(aes(y = value, x = time_point, color=time_point, fill=time_point)) +
  geom_jitter(width = 0.2, show.legend = FALSE) +
  geom_boxplot(width = 0.5, alpha=0.5, outlier.shape = NA, show.legend = FALSE) +
  scale_color_manual(values=treatment_colors)+
  scale_fill_manual(values=treatment_colors) +
  facet_wrap(. ~ metric, scales = "free") +
  theme_classic() +
  theme(
    strip.background = element_blank(),
    panel.grid.minor.x = element_line(size = .1, color = "grey"),
    axis.title.x = element_blank(),
    axis.title.y = element_text(size=10),
    axis.text.x = element_text(angle = 45, hjust = 1),
    # Increase plot size
    plot.title = element_text(size = 10),
    axis.text = element_text(size = 8),
    axis.title = element_text(size = 8)
      ) +
  ylab("Alpha diversity")

Richness

Modelq0GLMMNB <- glmer.nb(richness ~ time_point+(1|individual), data = alpha_div_meta)
summary(Modelq0GLMMNB)
Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
 Family: Negative Binomial(7.7073)  ( log )
Formula: richness ~ time_point + (1 | individual)
   Data: alpha_div_meta

     AIC      BIC   logLik deviance df.resid 
   341.1    347.3   -166.5    333.1       31 

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-1.88922 -0.29613  0.04659  0.45757  1.21930 

Random effects:
 Groups     Name        Variance Std.Dev.
 individual (Intercept) 0.1706   0.413   
Number of obs: 35, groups:  individual, 18

Fixed effects:
               Estimate Std. Error z value Pr(>|z|)    
(Intercept)      3.7578     0.1394  26.952  < 2e-16 ***
time_pointWild   0.4405     0.1344   3.278  0.00104 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr)
time_pntWld -0.528
emmeans::emmeans(Modelq0GLMMNB, pairwise ~ time_point)
$emmeans
 time_point  emmean    SE  df asymp.LCL asymp.UCL
 Acclimation   3.76 0.139 Inf      3.48      4.03
 Wild          4.20 0.133 Inf      3.94      4.46

Results are given on the log (not the response) scale. 
Confidence level used: 0.95 

$contrasts
 contrast           estimate    SE  df z.ratio p.value
 Acclimation - Wild   -0.441 0.134 Inf  -3.278  0.0010

Results are given on the log (not the response) scale. 

Neutral

Model_neutral <- lme(fixed = neutral ~ time_point, data = alpha_div_meta,
               random = ~ 1 | individual)
summary(Model_neutral)
Linear mixed-effects model fit by REML
  Data: alpha_div_meta 
       AIC      BIC    logLik
  258.7499 264.7359 -125.3749

Random effects:
 Formula: ~1 | individual
        (Intercept) Residual
StdDev:    8.169821 7.178626

Fixed effects:  neutral ~ time_point 
                  Value Std.Error DF  t-value p-value
(Intercept)    19.83867  2.614283 17 7.588570       0
time_pointWild 16.81743  2.447303 16 6.871819       0
 Correlation: 
               (Intr)
time_pointWild -0.489

Standardized Within-Group Residuals:
        Min          Q1         Med          Q3         Max 
-1.52249490 -0.58253918 -0.03654144  0.58990396  1.40500371 

Number of Observations: 35
Number of Groups: 18 

Phylogenetic

Model_phylo <- lme(fixed = phylogenetic ~ time_point, data = alpha_div_meta,
               random = ~ 1 | individual)
summary(Model_phylo)
Linear mixed-effects model fit by REML
  Data: alpha_div_meta 
       AIC      BIC   logLik
  129.9486 135.9346 -60.9743

Random effects:
 Formula: ~1 | individual
        (Intercept) Residual
StdDev:   0.6826651 1.252366

Fixed effects:  phylogenetic ~ time_point 
                   Value Std.Error DF   t-value p-value
(Intercept)     5.368277 0.3454342 17 15.540667  0.0000
time_pointWild -0.135768 0.4249336 16 -0.319504  0.7535
 Correlation: 
               (Intr)
time_pointWild -0.637

Standardized Within-Group Residuals:
        Min          Q1         Med          Q3         Max 
-2.18050022 -0.47485140  0.08030429  0.37904332  1.82607719 

Number of Observations: 35
Number of Groups: 18 

4.2.2 Beta diversity

samples_to_keep <- sample_metadata %>%
  filter(Population == "Cold_wet" & time_point %in% c("Acclimation", "Wild")) %>% 
  select(Tube_code) %>% 
  pull()
subset_meta <- sample_metadata %>%
  filter(Population == "Cold_wet" & time_point %in% c("Acclimation", "Wild"))

Richness

richness <- as.matrix(beta_q0n$S)
richness <- as.dist(richness[rownames(richness) %in% samples_to_keep,
               colnames(richness) %in% samples_to_keep])

betadisper(richness, subset_meta$time_point) %>% permutest(.) 

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df  Sum Sq  Mean Sq      F N.Perm Pr(>F)  
Groups     1 0.07277 0.072771 7.2819    999  0.012 *
Residuals 33 0.32979 0.009993                       
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(richness ~ time_point,
        data = subset_meta %>% arrange(match(Tube_code,labels(richness))),
        permutations = 999,
        strata = subset_meta %>% arrange(match(Tube_code,labels(richness))) %>% pull(individual)) %>%
        broom::tidy() %>%
        tt()
term df SumOfSqs R2 statistic p.value
Model 1 0.5295984 0.05492497 1.917862 0.001
Residual 33 9.1126169 0.94507503 NA NA
Total 34 9.6422153 1.00000000 NA NA

Neutral

neutral <- as.matrix(beta_q1n$S)
neutral <- as.dist(neutral[rownames(neutral) %in% samples_to_keep,
               colnames(neutral) %in% samples_to_keep])

betadisper(neutral, subset_meta$time_point) %>% permutest(.) 

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df  Sum Sq  Mean Sq      F N.Perm Pr(>F)
Groups     1 0.02764 0.027637 2.2078    999  0.128
Residuals 33 0.41309 0.012518                     
adonis2(neutral ~ time_point,
        data = subset_meta %>% arrange(match(Tube_code,labels(neutral))),
        permutations = 999,
        strata = subset_meta %>% arrange(match(Tube_code,labels(neutral))) %>% pull(individual)) %>%
        broom::tidy() %>%
        tt()
term df SumOfSqs R2 statistic p.value
Model 1 0.7470135 0.08504426 3.067318 0.001
Residual 33 8.0368067 0.91495574 NA NA
Total 34 8.7838201 1.00000000 NA NA

Phylogenetic

phylo <- as.matrix(beta_q1p$S)
phylo <- as.dist(phylo[rownames(phylo) %in% samples_to_keep,
               colnames(phylo) %in% samples_to_keep])
betadisper(phylo, subset_meta$time_point) %>% permutest(.) 

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df  Sum Sq  Mean Sq     F N.Perm Pr(>F)  
Groups     1 0.04119 0.041187 2.897    999  0.089 .
Residuals 33 0.46917 0.014217                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(phylo ~ time_point,
        data = subset_meta %>% arrange(match(Tube_code,labels(phylo))),
        permutations = 999,
        strata = subset_meta %>% arrange(match(Tube_code,labels(phylo))) %>% pull(individual)) %>%
        broom::tidy() %>%
        tt()
term df SumOfSqs R2 statistic p.value
Model 1 0.2075719 0.1133094 4.217042 0.001
Residual 33 1.6243310 0.8866906 NA NA
Total 34 1.8319029 1.0000000 NA NA

4.2.3 Structural zeros

wild_samples <- sample_metadata %>%
  filter(Population == "Cold_wet" & time_point == "Wild") %>% 
  dplyr::select(sample) %>%
  pull()

acclimation_samples <- sample_metadata %>%
  filter(Population == "Cold_wet" & time_point == "Acclimation") %>% 
  dplyr::select(sample) %>% pull()

structural_zeros <- genome_counts_filt %>% 
  select(one_of(c("genome",subset_meta$sample))) %>%
  filter(rowSums(across(where(is.numeric), ~ . != 0)) > 0) %>%
  select(genome, where(~ is.numeric(.) && sum(.) > 0)) %>% 
   rowwise() %>% #compute for each row (genome)
   mutate(all_zeros_wild = all(c_across(all_of(wild_samples)) == 0)) %>% 
   mutate(all_zeros_acclimation= all(c_across(all_of(acclimation_samples)) == 0)) %>% 
   mutate(average_wild = mean(c_across(all_of(wild_samples)), na.rm = TRUE)) %>% 
   mutate(average_acclimation = mean(c_across(all_of(acclimation_samples)), na.rm = TRUE)) %>% 
   filter(all_zeros_wild == TRUE || all_zeros_acclimation==TRUE)  %>% 
   mutate(present = case_when(
      all_zeros_wild & !all_zeros_acclimation ~ "acclimation",
      !all_zeros_wild & all_zeros_acclimation ~ "wild",
      !all_zeros_wild & !all_zeros_acclimation ~ "None",
      TRUE ~ NA_character_
    )) %>%
   mutate(average = ifelse(present == "acclimation", average_wild, average_acclimation)) %>%
   dplyr::select(genome, present, average) %>%
   left_join(genome_metadata, by=join_by(genome==genome)) %>%
   arrange(present,-average)

Wild

structural_zeros %>% 
  filter(present=="wild") %>% 
  count(phylum, name = "sample_count") %>%
  arrange(desc(sample_count)) 
# A tibble: 5 × 2
# Rowwise: 
  phylum           sample_count
  <chr>                   <int>
1 p__Bacillota_A              7
2 p__Bacillota                1
3 p__Bacillota_B              1
4 p__Bacteroidota             1
5 p__Spirochaetota            1

Acclimation

structural_zeros %>% 
  filter(present=="acclimation") %>% 
  select(1,5:10)
# A tibble: 14 × 7
# Rowwise: 
   genome                phylum            class                  order               family                genus              species                       
   <chr>                 <chr>             <chr>                  <chr>               <chr>                 <chr>              <chr>                         
 1 AH1_2nd_12:bin_000001 p__Pseudomonadota c__Gammaproteobacteria o__Pseudomonadales  f__Moraxellaceae      g__Acinetobacter   s__Acinetobacter johnsonii    
 2 AH1_2nd_15:bin_000001 p__Pseudomonadota c__Alphaproteobacteria o__Rhizobiales      f__Rhizobiaceae       g__Agrobacterium   s__Agrobacterium tumefaciens_H
 3 AH1_2nd_17:bin_000010 p__Bacteroidota   c__Bacteroidia         o__Bacteroidales    f__Rikenellaceae      g__Mucinivorans    s__                           
 4 AH1_2nd_17:bin_000038 p__Bacteroidota   c__Bacteroidia         o__Bacteroidales    f__Bacteroidaceae     g__Bacteroides_G   s__                           
 5 AH1_2nd_20:bin_000014 p__Pseudomonadota c__Gammaproteobacteria o__Enterobacterales f__Enterobacteriaceae g__Citrobacter     s__Citrobacter portucalensis  
 6 AH1_2nd_20:bin_000061 p__Bacillota      c__Bacilli             o__Lactobacillales  f__Enterococcaceae    g__Enterococcus    s__                           
 7 AH1_2nd_20:bin_000073 p__Bacillota      c__Bacilli             o__Lactobacillales  f__Enterococcaceae    g__Enterococcus    s__Enterococcus sp002174455   
 8 LI1_2nd_10:bin_000001 p__Bacillota      c__Bacilli             o__Lactobacillales  f__Enterococcaceae    g__Enterococcus_A  s__Enterococcus_A raffinosus  
 9 LI1_2nd_10:bin_000017 p__Chlamydiota    c__Chlamydiia          o__Chlamydiales     f__Chlamydiaceae      g__                s__                           
10 LI1_2nd_2:bin_000039  p__Bacillota_A    c__Clostridia          o__TANB77           f__CAG-508            g__                s__                           
11 LI1_2nd_7:bin_000045  p__Bacillota_A    c__Clostridia          o__Oscillospirales  f__Acutalibacteraceae g__                s__                           
12 LI1_2nd_7:bin_000074  p__Pseudomonadota c__Gammaproteobacteria o__Enterobacterales f__Enterobacteriaceae g__Escherichia     s__Escherichia coli           
13 LI1_2nd_8:bin_000030  p__Actinomycetota c__Actinomycetia       o__Mycobacteriales  f__Mycobacteriaceae   g__Corynebacterium s__                           
14 LI1_2nd_8:bin_000079  p__Pseudomonadota c__Gammaproteobacteria o__Enterobacterales f__Enterobacteriaceae g__Citrobacter_A   s__Citrobacter_A amalonaticus 

Community elements differences in structural zeros

element_gift <- GIFTs_elements_community %>% 
  as.data.frame() %>% 
  rownames_to_column(., "sample") %>% 
  left_join(., sample_metadata[c(12,13)], by=join_by("sample"=="sample"))
uniqueGIFT_db<- unique(GIFT_db[c(2,4,5,6)]) %>% unite("Function",Function:Element, sep= "_", remove=FALSE)

element_gift %>%
    pivot_longer(-c(sample,treatment), names_to = "trait", values_to = "value") %>%
    group_by(trait) %>%
    summarise(p_value = wilcox.test(value ~ treatment)$p.value) %>%
    mutate(p_adjust=p.adjust(p_value, method="BH")) %>%
    filter(p_adjust < 0.05)
# A tibble: 35 × 3
   trait    p_value p_adjust
   <chr>      <dbl>    <dbl>
 1 B0214 0.000261   0.00340 
 2 B0219 0.00164    0.0111  
 3 B0302 0.000198   0.00284 
 4 B0703 0.00306    0.0182  
 5 B0709 0.000103   0.00284 
 6 B0712 0.000551   0.00561 
 7 B0801 0.000198   0.00284 
 8 B1012 0.00136    0.0102  
 9 B1028 0.00000837 0.000599
10 D0102 0.00131    0.0102  
# ℹ 25 more rows

4.2.4 Differential abundance analysis: composition

phylo_samples <- sample_metadata %>%
  filter(Population == "Cold_wet" & time_point %in% c("Wild", "Acclimation") )%>% 
  column_to_rownames("sample") %>% 
  sample_data()
phylo_genome <- genome_counts_filt %>% 
  filter(!genome %in% structural_zeros$genome) %>% 
  select(one_of(c("genome",rownames(phylo_samples)))) %>%
  filter(rowSums(across(where(is.numeric), ~ . != 0)) > 0) %>%
  select(genome, where(~ is.numeric(.) && sum(.) > 0)) %>%
  column_to_rownames("genome") %>% 
  otu_table(., taxa_are_rows = TRUE)
phylo_taxonomy <- genome_metadata %>% 
  filter(genome %in% rownames(phylo_genome)) %>% 
  column_to_rownames("genome") %>% 
  dplyr::select(domain,phylum,class,order,family,genus,species) %>% 
  as.matrix() %>% 
  tax_table()

physeq_genome_filtered <- phyloseq(phylo_genome, phylo_taxonomy, phylo_samples)
ancom_rand_output = ancombc2(data = physeq_genome_filtered, 
                  assay_name = "counts", 
                  tax_level = NULL,
                  fix_formula = "time_point",
                  rand_formula = "(1|individual)",
                  p_adj_method = "holm", 
                  pseudo_sens = TRUE,
                  prv_cut =0, 
                  lib_cut = 0, 
                  s0_perc = 0.05,
                  group = NULL, 
                  struc_zero = FALSE, 
                  neg_lb = FALSE,
                  alpha = 0.05, 
                  n_cl = 2, 
                  verbose = TRUE,
                  global = FALSE, 
                  pairwise = FALSE, 
                  dunnet = FALSE, 
                  trend = FALSE,
                  iter_control = list(tol = 1e-5, max_iter = 20, verbose = FALSE),
                  em_control = list(tol = 1e-5, max_iter = 100),
                  lme_control = lme4::lmerControl(),
                  mdfdr_control = list(fwer_ctrl_method = "holm", B = 100), 
                  trend_control = NULL)
save(ancom_rand_output, 
     file = "data/ancombc_wild_accl.Rdata")
load("data/ancombc_wild_accl.Rdata")
taxonomy <- data.frame(physeq_genome_filtered@tax_table) %>%
  rownames_to_column(., "taxon") %>%
  mutate_at(vars(order, phylum, family, genus, species), ~ str_replace(., "[dpcofgs]__", ""))

ancombc_rand_table_mag <- ancom_rand_output$res %>%
  dplyr::select(taxon, lfc_time_point1_Acclimation, p_time_point1_Acclimation) %>%
  filter(p_time_point1_Acclimation < 0.05) %>%
  dplyr::arrange(p_time_point1_Acclimation) %>%
  merge(., taxonomy, by="taxon") %>%
  mutate_at(vars(phylum, species), ~ str_replace(., "[dpcofgs]__", ""))%>%
  dplyr::arrange(lfc_time_point1_Acclimation)

colors_alphabetic <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  mutate_at(vars(phylum), ~ str_replace(., "[dpcofgs]__", ""))  %>%
  right_join(taxonomy, by=join_by(phylum == phylum)) %>%
  dplyr::select(phylum, colors) %>%
  mutate(colors = str_c(colors, "80"))  %>% #add 80% alpha
    unique() %>%
    dplyr::arrange(phylum)

tax_table <- as.data.frame(unique(ancombc_rand_table_mag$phylum))
  
colnames(tax_table)[1] <- "phylum"
tax_color <- merge(tax_table, colors_alphabetic, by="phylum")%>%
    dplyr::arrange(phylum) %>%
    dplyr::select(colors) %>%
    pull()
ancombc_rand_table_mag%>%
      mutate(genome=factor(taxon,levels=ancombc_rand_table_mag$taxon)) %>%
ggplot(., aes(x=lfc_time_point1_Acclimation, y=forcats::fct_reorder(genome,lfc_time_point1_Acclimation), fill=phylum)) + #forcats::fct_rev()
  geom_col() + 
  scale_fill_manual(values=tax_color) + 
  geom_hline(yintercept=0) + 
#  coord_flip()+
  theme(panel.background = element_blank(),
        axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
        axis.text.x = element_text(size = 12),
        axis.text.y = element_text(size = 8),
        axis.title = element_text(size = 14, face = "bold"),
        legend.text = element_text(size = 12),
        legend.title = element_text(size = 14, face = "bold"),
        legend.position = "right", legend.box = "vertical")+
  xlab("log2FoldChange") + 
  ylab("Species")+
  guides(fill=guide_legend(title="Phylum"))

Phyla of the significant MAGs

ancombc_rand_table_mag%>%
  filter(lfc_time_point1_Acclimation<0)  %>% 
  count(phylum, name = "sample_count") %>%
  arrange(desc(sample_count))  
        phylum sample_count
1  Bacillota_A           16
2 Bacteroidota            9
3  Bacillota_C            1
ancombc_rand_table_mag%>%
  filter(lfc_time_point1_Acclimation<0)  %>% 
  select(phylum, genus, species)
         phylum                genus                      species
1   Bacillota_A           MGBC140009                             
2   Bacillota_A                                                  
3   Bacillota_A                                                  
4   Bacillota_A     Negativibacillus                             
5  Bacteroidota      Parabacteroides                             
6  Bacteroidota          Bacteroides                             
7   Bacillota_A           Hungatella                             
8  Bacteroidota          Bacteroides                             
9  Bacteroidota      Parabacteroides                             
10 Bacteroidota      Parabacteroides                             
11 Bacteroidota          Bacteroides                             
12  Bacillota_A                                                  
13 Bacteroidota          Bacteroides                             
14  Bacillota_A                                                  
15  Bacillota_A           Copromonas                             
16  Bacillota_A        Enterocloster                             
17  Bacillota_A      Velocimicrobium                             
18  Bacillota_A               CAG-95                             
19  Bacillota_C                                                  
20 Bacteroidota          Bacteroides                             
21  Bacillota_A                                                  
22  Bacillota_A           Copromonas                             
23 Bacteroidota          Bacteroides Bacteroides thetaiotaomicron
24  Bacillota_A Pseudoflavonifractor                             
25  Bacillota_A        Enterocloster                             
26  Bacillota_A             JALFVM01                             
ancombc_rand_table_mag%>%
  filter(lfc_time_point1_Acclimation<0)  %>% 
  count(genus, name = "sample_count") %>%
  arrange(desc(sample_count))
                  genus sample_count
1                                  6
2           Bacteroides            6
3       Parabacteroides            3
4            Copromonas            2
5         Enterocloster            2
6                CAG-95            1
7            Hungatella            1
8              JALFVM01            1
9            MGBC140009            1
10     Negativibacillus            1
11 Pseudoflavonifractor            1
12      Velocimicrobium            1

4.2.5 Differences in the functional capacity

GIFTs_elements <- to.elements(genome_gifts,GIFT_db)
GIFTs_elements_filtered <- GIFTs_elements[rownames(GIFTs_elements) %in% genome_counts_filt$genome,]
GIFTs_elements_filtered <- as.data.frame(GIFTs_elements_filtered) %>% 
  select_if(~ !is.numeric(.) || sum(.) != 0)

GIFTs_functions <- to.functions(GIFTs_elements_filtered,GIFT_db)

GIFTs_domains <- to.domains(GIFTs_functions,GIFT_db)

sample_sub <- sample_metadata %>%
  filter(Population == "Cold_wet" & treatment %in% c("Acclimation", "Wild"))

genome_counts_row <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% 
    select(one_of(c("genome",sample_sub$sample))) %>%
  column_to_rownames(., "genome") 

GIFTs_elements_community <- to.community(GIFTs_elements_filtered,genome_counts_row,GIFT_db)
GIFTs_functions_community <- to.community(GIFTs_functions,genome_counts_row,GIFT_db)
GIFTs_domains_community <- to.community(GIFTs_domains,genome_counts_row,GIFT_db)

4.2.5.1 Community elements differences:

MCI

GIFTs_elements_community %>%
  rowMeans() %>%
  as_tibble(., rownames = "sample") %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>%
  group_by(treatment) %>%
  summarise(MCI = mean(value), sd = sd(value))
# A tibble: 2 × 3
  treatment     MCI     sd
  <chr>       <dbl>  <dbl>
1 Acclimation 0.335 0.0324
2 Wild        0.350 0.0237
MCI <- GIFTs_elements_community %>%
  rowMeans() %>%
  as_tibble(., rownames = "sample") %>%
  left_join(sample_metadata, by = join_by(sample == sample)) 

shapiro.test(MCI$value)

    Shapiro-Wilk normality test

data:  MCI$value
W = 0.94347, p-value = 0.07144
wilcox.test(value ~ treatment, data=MCI)

    Wilcoxon rank sum exact test

data:  value by treatment
W = 114, p-value = 0.2066
alternative hypothesis: true location shift is not equal to 0
element_gift <- GIFTs_elements_community %>% 
  as.data.frame() %>% 
  rownames_to_column(., "sample") %>% 
  left_join(., sample_metadata[c(12,13)], by=join_by("sample"=="sample"))
uniqueGIFT_db<- unique(GIFT_db[c(2,4,5,6)]) %>% unite("Function",Function:Element, sep= "_", remove=FALSE)

significant_elements <- element_gift %>%
    pivot_longer(-c(sample,treatment), names_to = "trait", values_to = "value") %>%
    group_by(trait) %>%
    summarise(p_value = wilcox.test(value ~ treatment)$p.value) %>%
    mutate(p_adjust=p.adjust(p_value, method="BH")) %>%
    filter(p_adjust < 0.05)%>%
  left_join(.,uniqueGIFT_db[c(1,3)],by = join_by(trait == Code_element))

element_gift_t <- element_gift  %>% 
  t() %>%
  row_to_names(row_number = 1) %>%
  as.data.frame() %>%
  mutate_if(is.character, as.numeric)  %>%
  rownames_to_column(., "trait")

element_gift_filt <- subset(element_gift_t, trait %in% significant_elements$trait) %>% 
  t() %>%
  row_to_names(row_number = 1) %>%
  as.data.frame() %>%
  mutate_if(is.character, as.numeric)  %>%
  rownames_to_column(., "sample")%>% 
  left_join(., sample_metadata[c(12,13)], by = join_by(sample == sample))

difference_table <- element_gift_filt %>%
  select(-sample) %>%
  group_by(treatment) %>%
  summarise(across(everything(), mean)) %>%
  t() %>%
  row_to_names(row_number = 1) %>%
  as.data.frame() %>%
  mutate_if(is.character, as.numeric) %>%
  rownames_to_column(., "Elements") %>%
  left_join(.,uniqueGIFT_db[c(1,3,4)],by = join_by(Elements == Code_element)) %>% 
  arrange(Function) %>% 
  mutate(Difference=Acclimation-Wild)%>% 
  mutate(group_color = ifelse(Difference <0, "Wild","Acclimation")) 
difference_table %>%
  ggplot(aes(x=forcats::fct_reorder(Function,Difference), y=Difference, fill=group_color)) + 
  geom_col() +
#  geom_point(size=4) + 
  scale_fill_manual(values=treatment_colors) +
  geom_hline(yintercept=0) + 
  coord_flip()+
  theme(axis.text = element_text(size = 10),
        axis.title = element_text(size = 12),
        legend.position = "right", 
        panel.background = element_blank(),
          panel.grid.major = element_line(size = 0.15, linetype = 'solid',
                                colour = "grey"))+
  xlab("Microbial Functional Capacity") + 
  ylab("Mean difference")+
  labs(fill="Treatment")

4.2.5.2 Community functions differences

MCI

GIFTs_functions_community %>%
  rowMeans() %>%
  as_tibble(., rownames = "sample") %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>%
  group_by(treatment) %>%
  summarise(MCI = mean(value), sd = sd(value))
# A tibble: 2 × 3
  treatment     MCI     sd
  <chr>       <dbl>  <dbl>
1 Acclimation 0.330 0.0320
2 Wild        0.346 0.0196
MCI <- GIFTs_functions_community %>%
  rowMeans() %>%
  as_tibble(., rownames = "sample") %>%
  left_join(sample_metadata, by = join_by(sample == sample)) 

shapiro.test(MCI$value)

    Shapiro-Wilk normality test

data:  MCI$value
W = 0.94507, p-value = 0.07998
wilcox.test(value ~ treatment, data=MCI)

    Wilcoxon rank sum exact test

data:  value by treatment
W = 100, p-value = 0.08313
alternative hypothesis: true location shift is not equal to 0
function_gift <- GIFTs_functions_community %>% 
  as.data.frame() %>% 
  rownames_to_column(., "sample") %>% 
  merge(., sample_metadata[c(12,13)], by="sample")
unique_funct_db<- GIFT_db[c(3,4,5)] %>% 
  distinct(Code_function, .keep_all = TRUE)

significant_functional <- function_gift %>%
  pivot_longer(-c(sample,treatment), names_to = "trait", values_to = "value") %>%
  group_by(trait) %>%
  summarise(p_value = wilcox.test(value ~ treatment)$p.value) %>%
  mutate(p_adjust=p.adjust(p_value, method="BH")) %>%
  filter(p_adjust < 0.05)%>%
  left_join(.,unique_funct_db[c(1,3)],by = join_by(trait == Code_function))
Code_function Acclimation Wild Function Difference treatment
B03 0.1097356 0.1256091 Amino acid derivative biosynthesis -0.0158735 Wild
D03 0.2921075 0.3442827 Sugar degradation -0.0521752 Wild

4.3 Does the antibiotic administration change the microbial community?

alpha_div_meta <- alpha_div %>%
  left_join(sample_metadata, by = join_by(sample == sample))%>%
  filter(Population == "Cold_wet" & time_point %in% c("Acclimation", "Antibiotics") ) 

4.3.1 Alpha diversity

alpha_div %>%
  pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
  left_join(., sample_metadata, by = "sample") %>%
  mutate(metric=factor(metric,levels=c("richness","neutral","phylogenetic"))) %>%
  filter(Population == "Cold_wet" & time_point %in% c("Acclimation", "Antibiotics") ) %>% 
  ggplot(aes(y = value, x = time_point, color=time_point, fill=time_point)) +
  geom_jitter(width = 0.2, show.legend = FALSE) +
  geom_boxplot(width = 0.5, alpha=0.5, outlier.shape = NA, show.legend = FALSE) +
  scale_color_manual(values=treatment_colors)+
  scale_fill_manual(values=treatment_colors) +
  facet_wrap(. ~ metric, scales = "free") +
  theme_classic() +
  theme(
    strip.background = element_blank(),
    panel.grid.minor.x = element_line(size = .1, color = "grey"),
    axis.title.x = element_blank(),
    axis.title.y = element_text(size=10),
    axis.text.x = element_text(angle = 45, hjust = 1),
    # Increase plot size
    plot.title = element_text(size = 10),
    axis.text = element_text(size = 8),
    axis.title = element_text(size = 8)
      ) +
  ylab("Alpha diversity")

Richness

Modelq0GLMMNB <- glmer.nb(richness ~ time_point+(1|individual), data = alpha_div_meta)
summary(Modelq0GLMMNB)
Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
 Family: Negative Binomial(4.6838)  ( log )
Formula: richness ~ time_point + (1 | individual)
   Data: alpha_div_meta

     AIC      BIC   logLik deviance df.resid 
   278.1    283.9   -135.0    270.1       28 

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-1.49142 -0.49637 -0.06281  0.55004  1.99619 

Random effects:
 Groups     Name        Variance Std.Dev.
 individual (Intercept) 0.357    0.5975  
Number of obs: 32, groups:  individual, 18

Fixed effects:
                      Estimate Std. Error z value Pr(>|z|)    
(Intercept)             3.7303     0.1880  19.844  < 2e-16 ***
time_pointAntibiotics  -1.1593     0.1966  -5.896 3.72e-09 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr)
tm_pntAntbt -0.427
emmeans::emmeans(Modelq0GLMMNB, pairwise ~ time_point)
$emmeans
 time_point  emmean    SE  df asymp.LCL asymp.UCL
 Acclimation   3.73 0.188 Inf      3.36      4.10
 Antibiotics   2.57 0.206 Inf      2.17      2.97

Results are given on the log (not the response) scale. 
Confidence level used: 0.95 

$contrasts
 contrast                  estimate    SE  df z.ratio p.value
 Acclimation - Antibiotics     1.16 0.197 Inf   5.896  <.0001

Results are given on the log (not the response) scale. 

Neutral

Model_neutral <- lme(fixed = neutral ~ time_point, data = alpha_div_meta,
               random = ~ 1 | individual)
summary(Model_neutral)
Linear mixed-effects model fit by REML
  Data: alpha_div_meta 
       AIC      BIC    logLik
  224.1303 229.7351 -108.0652

Random effects:
 Formula: ~1 | individual
        (Intercept) Residual
StdDev:    3.198228 7.479481

Fixed effects:  neutral ~ time_point 
                          Value Std.Error DF   t-value p-value
(Intercept)            19.11841  1.971629 17  9.696759   0e+00
time_pointAntibiotics -11.46314  2.675428 13 -4.284600   9e-04
 Correlation: 
                      (Intr)
time_pointAntibiotics -0.63 

Standardized Within-Group Residuals:
        Min          Q1         Med          Q3         Max 
-1.86893024 -0.51778639 -0.08383536  0.57135437  1.94021413 

Number of Observations: 32
Number of Groups: 18 

Phylogenetic

Model_phylo <- lme(fixed = phylogenetic ~ time_point, data = alpha_div_meta,
               random = ~ 1 | individual)
summary(Model_phylo)
Linear mixed-effects model fit by REML
  Data: alpha_div_meta 
      AIC      BIC    logLik
  138.105 143.7097 -65.05248

Random effects:
 Formula: ~1 | individual
        (Intercept) Residual
StdDev:    1.022301 1.674107

Fixed effects:  phylogenetic ~ time_point 
                          Value Std.Error DF   t-value p-value
(Intercept)            5.411618  0.474784 17 11.398064  0.0000
time_pointAntibiotics -0.793905  0.603291 13 -1.315958  0.2109
 Correlation: 
                      (Intr)
time_pointAntibiotics -0.586

Standardized Within-Group Residuals:
        Min          Q1         Med          Q3         Max 
-1.65952954 -0.50058715 -0.03050434  0.42433230  1.61438947 

Number of Observations: 32
Number of Groups: 18 

4.3.2 Beta diversity

samples_to_keep <- sample_metadata %>%
  filter(Population == "Cold_wet" & time_point %in% c("Acclimation", "Antibiotics")) %>% 
  select(Tube_code) %>% 
  pull()
subset_meta <- sample_metadata %>%
  filter(Population == "Cold_wet" & time_point %in% c("Acclimation", "Antibiotics"))

Richness

richness <- as.matrix(beta_q0n$S)
richness <- as.dist(richness[rownames(richness) %in% samples_to_keep,
               colnames(richness) %in% samples_to_keep])

betadisper(richness, subset_meta$time_point) %>% permutest(.) 

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df   Sum Sq   Mean Sq      F N.Perm Pr(>F)
Groups     1 0.013687 0.0136873 2.0494    999  0.142
Residuals 30 0.200356 0.0066785                     
adonis2(richness ~ time_point,
        data = subset_meta %>% arrange(match(Tube_code,labels(richness))),
        permutations = 999,
        strata = subset_meta %>% arrange(match(Tube_code,labels(richness))) %>% pull(individual)) %>%
        broom::tidy() %>%
        tt()
term df SumOfSqs R2 statistic p.value
Model 1 1.580367 0.1313608 4.536779 0.001
Residual 30 10.450370 0.8686392 NA NA
Total 31 12.030738 1.0000000 NA NA

Neutral

neutral <- as.matrix(beta_q1n$S)
neutral <- as.dist(neutral[rownames(neutral) %in% samples_to_keep,
               colnames(neutral) %in% samples_to_keep])

betadisper(neutral, subset_meta$time_point) %>% permutest(.) 

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df  Sum Sq  Mean Sq      F N.Perm Pr(>F)  
Groups     1 0.03491 0.034906 2.5203    999  0.097 .
Residuals 30 0.41549 0.013850                       
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(neutral ~ time_point,
        data = subset_meta %>% arrange(match(Tube_code,labels(neutral))),
        permutations = 999,
        strata = subset_meta %>% arrange(match(Tube_code,labels(neutral))) %>% pull(individual)) %>%
        broom::tidy() %>%
        tt()
term df SumOfSqs R2 statistic p.value
Model 1 1.78013 0.1607184 5.744856 0.001
Residual 30 9.29595 0.8392816 NA NA
Total 31 11.07608 1.0000000 NA NA

Phylogenetic

phylo <- as.matrix(beta_q1p$S)
phylo <- as.dist(phylo[rownames(phylo) %in% samples_to_keep,
               colnames(phylo) %in% samples_to_keep])
betadisper(phylo, subset_meta$time_point) %>% permutest(.) 

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df  Sum Sq  Mean Sq      F N.Perm Pr(>F)    
Groups     1 0.31336 0.313365 16.503    999  0.001 ***
Residuals 30 0.56964 0.018988                         
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(phylo ~ time_point,
        data = subset_meta %>% arrange(match(Tube_code,labels(phylo))),
        permutations = 999,
        strata = subset_meta %>% arrange(match(Tube_code,labels(phylo))) %>% pull(individual)) %>%
        broom::tidy() %>%
        tt()
term df SumOfSqs R2 statistic p.value
Model 1 1.572915 0.2890287 12.19579 0.001
Residual 30 3.869157 0.7109713 NA NA
Total 31 5.442071 1.0000000 NA NA

4.3.3 Structural zeros

acclimation_samples <- sample_metadata %>%
  filter(Population == "Cold_wet" & time_point == "Acclimation") %>% 
  dplyr::select(sample) %>%
  pull()

antibiotics_samples <- sample_metadata %>%
  filter(Population == "Cold_wet" & time_point == "Antibiotics") %>% 
  dplyr::select(sample) %>% pull()

structural_zeros <- genome_counts_filt %>% 
  select(one_of(c("genome",subset_meta$sample))) %>%
  filter(rowSums(across(where(is.numeric), ~ . != 0)) > 0) %>%
  select(genome, where(~ is.numeric(.) && sum(.) > 0)) %>% 
   rowwise() %>% #compute for each row (genome)
   mutate(all_zeros_acclimation = all(c_across(all_of(acclimation_samples)) == 0)) %>% 
   mutate(all_zeros_antibiotics = all(c_across(all_of(antibiotics_samples)) == 0)) %>% 
   mutate(average_acclimation = mean(c_across(all_of(acclimation_samples)), na.rm = TRUE)) %>% 
   mutate(average_antibiotics = mean(c_across(all_of(antibiotics_samples)), na.rm = TRUE)) %>% 
   filter(all_zeros_acclimation == TRUE || all_zeros_antibiotics==TRUE)  %>% 
   mutate(present = case_when(
      all_zeros_acclimation & !all_zeros_antibiotics ~ "antibiotics",
      !all_zeros_acclimation & all_zeros_antibiotics ~ "acclimation",
      !all_zeros_acclimation & !all_zeros_antibiotics ~ "None",
      TRUE ~ NA_character_
    )) %>%
   mutate(average = ifelse(present == "acclimation", average_acclimation, average_antibiotics)) %>%
   dplyr::select(genome, present, average) %>%
   left_join(genome_metadata, by=join_by(genome==genome)) %>%
   arrange(present,-average)

Acclimation

structural_zeros %>% 
  filter(present=="acclimation") %>% 
  count(phylum, name = "sample_count") %>%
  arrange(desc(sample_count)) 
# A tibble: 9 × 2
# Rowwise: 
  phylum               sample_count
  <chr>                       <int>
1 p__Bacillota_A                 43
2 p__Bacteroidota                15
3 p__Bacillota                    8
4 p__Pseudomonadota               7
5 p__Cyanobacteriota              3
6 p__Verrucomicrobiota            2
7 p__Bacillota_B                  1
8 p__Bacillota_C                  1
9 p__Fusobacteriota               1

Antibiotics

structural_zeros %>% 
  filter(present=="antibiotics") %>% 
  select(1,5:10)
# A tibble: 4 × 7
# Rowwise: 
  genome                phylum            class                  order                 family                genus         species            
  <chr>                 <chr>             <chr>                  <chr>                 <chr>                 <chr>         <chr>              
1 AH1_2nd_7:bin_000003  p__Pseudomonadota c__Gammaproteobacteria o__Enterobacterales   f__Enterobacteriaceae g__Proteus    s__Proteus cibarius
2 LI1_2nd_7:bin_000001  p__Bacillota_A    c__Clostridia          o__Clostridiales      f__Clostridiaceae     g__Sarcina    s__                
3 AH1_2nd_7:bin_000055  p__Bacillota      c__Bacilli             o__Mycoplasmatales    f__Mycoplasmoidaceae  g__Ureaplasma s__                
4 AH1_2nd_13:bin_000025 p__Bacillota_A    c__Clostridia          o__Christensenellales f__UBA3700            g__           s__                

Community elements differences in structural zeros

element_gift <- GIFTs_elements_community %>% 
  as.data.frame() %>% 
  rownames_to_column(., "sample") %>% 
  left_join(., sample_metadata[c(12,13)], by=join_by("sample"=="sample"))
uniqueGIFT_db<- unique(GIFT_db[c(2,4,5,6)]) %>% unite("Function",Function:Element, sep= "_", remove=FALSE)

element_gift %>%
    pivot_longer(-c(sample,treatment), names_to = "trait", values_to = "value") %>%
    group_by(trait) %>%
    summarise(p_value = wilcox.test(value ~ treatment)$p.value) %>%
    mutate(p_adjust=p.adjust(p_value, method="BH")) %>%
    filter(p_adjust < 0.05)
# A tibble: 0 × 3
# ℹ 3 variables: trait <chr>, p_value <dbl>, p_adjust <dbl>

4.3.4 Differential abundance analysis: composition

phylo_samples <- sample_metadata %>%
  filter(Population == "Cold_wet" & time_point %in% c("Acclimation", "Antibiotics") )%>% 
  column_to_rownames("sample") %>% 
  sample_data()
phylo_genome <- genome_counts_filt %>% 
  filter(!genome %in% structural_zeros$genome) %>% 
  select(one_of(c("genome",rownames(phylo_samples)))) %>%
  filter(rowSums(across(where(is.numeric), ~ . != 0)) > 0) %>%
  select(genome, where(~ is.numeric(.) && sum(.) > 0)) %>%
  column_to_rownames("genome") %>% 
  otu_table(., taxa_are_rows = TRUE)
phylo_taxonomy <- genome_metadata %>% 
  filter(genome %in% rownames(phylo_genome)) %>% 
  column_to_rownames("genome") %>% 
  dplyr::select(domain,phylum,class,order,family,genus,species) %>% 
  as.matrix() %>% 
  tax_table()

physeq_genome_filtered <- phyloseq(phylo_genome, phylo_taxonomy, phylo_samples)
ancom_rand_output = ancombc2(data = physeq_genome_filtered, 
                  assay_name = "counts", 
                  tax_level = NULL,
                  fix_formula = "time_point",
                  rand_formula = "(1|individual)",
                  p_adj_method = "holm", 
                  pseudo_sens = TRUE,
                  prv_cut =0, 
                  lib_cut = 0, 
                  s0_perc = 0.05,
                  group = NULL, 
                  struc_zero = FALSE, 
                  neg_lb = FALSE,
                  alpha = 0.05, 
                  n_cl = 2, 
                  verbose = TRUE,
                  global = FALSE, 
                  pairwise = FALSE, 
                  dunnet = FALSE, 
                  trend = FALSE,
                  iter_control = list(tol = 1e-5, max_iter = 20, verbose = FALSE),
                  em_control = list(tol = 1e-5, max_iter = 100),
                  lme_control = lme4::lmerControl(),
                  mdfdr_control = list(fwer_ctrl_method = "holm", B = 100), 
                  trend_control = NULL)
save(ancom_rand_output, 
     file = "data/ancombc_antib_accl.Rdata")
load("data/ancombc_antib_accl.Rdata")
taxonomy <- data.frame(physeq_genome_filtered@tax_table) %>%
  rownames_to_column(., "taxon") %>%
  mutate_at(vars(order, phylum, family, genus, species), ~ str_replace(., "[dpcofgs]__", ""))

ancombc_rand_table_mag <- ancom_rand_output$res %>%
  dplyr::select(taxon, lfc_time_point2_Antibiotics, p_time_point2_Antibiotics) %>%
  filter(p_time_point2_Antibiotics < 0.05) %>%
  dplyr::arrange(p_time_point2_Antibiotics) %>%
  merge(., taxonomy, by="taxon") %>%
  mutate_at(vars(phylum, species), ~ str_replace(., "[dpcofgs]__", ""))%>%
  dplyr::arrange(lfc_time_point2_Antibiotics)

colors_alphabetic <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  mutate_at(vars(phylum), ~ str_replace(., "[dpcofgs]__", ""))  %>%
  right_join(taxonomy, by=join_by(phylum == phylum)) %>%
  dplyr::select(phylum, colors) %>%
  mutate(colors = str_c(colors, "80"))  %>% #add 80% alpha
    unique() %>%
    dplyr::arrange(phylum)

tax_table <- as.data.frame(unique(ancombc_rand_table_mag$phylum))
  
colnames(tax_table)[1] <- "phylum"
tax_color <- merge(tax_table, colors_alphabetic, by="phylum")%>%
    dplyr::arrange(phylum) %>%
    dplyr::select(colors) %>%
    pull()
ancombc_rand_table_mag%>%
      mutate(genome=factor(taxon,levels=ancombc_rand_table_mag$taxon)) %>%
ggplot(., aes(x=lfc_time_point2_Antibiotics, y=forcats::fct_reorder(genome,lfc_time_point2_Antibiotics), fill=phylum)) + #forcats::fct_rev()
  geom_col() + 
  scale_fill_manual(values=tax_color) + 
  geom_hline(yintercept=0) + 
#  coord_flip()+
  theme(panel.background = element_blank(),
        axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
        axis.text.x = element_text(size = 12),
        axis.text.y = element_text(size = 8),
        axis.title = element_text(size = 14, face = "bold"),
        legend.text = element_text(size = 12),
        legend.title = element_text(size = 14, face = "bold"),
        legend.position = "right", legend.box = "vertical")+
  xlab("log2FoldChange") + 
  ylab("Species")+
  guides(fill=guide_legend(title="Phylum"))

Phyla of the significant MAGs

ancombc_rand_table_mag%>%
  filter(lfc_time_point2_Antibiotics<0)  %>% 
  count(phylum, name = "sample_count") %>%
  arrange(desc(sample_count))  
            phylum sample_count
1     Bacteroidota           14
2      Bacillota_A            4
3        Bacillota            2
4 Campylobacterota            1
ancombc_rand_table_mag%>%
  filter(lfc_time_point2_Antibiotics<0)  %>% 
  select(phylum, genus, species)
             phylum           genus species
1  Campylobacterota  Helicobacter_J        
2       Bacillota_A                        
3      Bacteroidota     Bacteroides        
4      Bacteroidota     Odoribacter        
5      Bacteroidota     Bacteroides        
6         Bacillota   Coprobacillus        
7         Bacillota                        
8      Bacteroidota     Phocaeicola        
9      Bacteroidota     Bacteroides        
10     Bacteroidota     Phocaeicola        
11     Bacteroidota     Odoribacter        
12     Bacteroidota     Bacteroides        
13     Bacteroidota     Bacteroides        
14     Bacteroidota Parabacteroides        
15     Bacteroidota                        
16     Bacteroidota Parabacteroides        
17      Bacillota_A                        
18     Bacteroidota       Alistipes        
19      Bacillota_A                        
20      Bacillota_A                        
21     Bacteroidota     Odoribacter        
ancombc_rand_table_mag%>%
  filter(lfc_time_point2_Antibiotics<0)  %>% 
  count(genus, name = "sample_count") %>%
  arrange(desc(sample_count))
            genus sample_count
1                            6
2     Bacteroides            5
3     Odoribacter            3
4 Parabacteroides            2
5     Phocaeicola            2
6       Alistipes            1
7   Coprobacillus            1
8  Helicobacter_J            1

4.3.5 Differences in the functional capacity

GIFTs_elements <- to.elements(genome_gifts,GIFT_db)
GIFTs_elements_filtered <- GIFTs_elements[rownames(GIFTs_elements) %in% genome_counts_filt$genome,]
GIFTs_elements_filtered <- as.data.frame(GIFTs_elements_filtered) %>% 
  select_if(~ !is.numeric(.) || sum(.) != 0)

GIFTs_functions <- to.functions(GIFTs_elements_filtered,GIFT_db)

GIFTs_domains <- to.domains(GIFTs_functions,GIFT_db)

sample_sub <- sample_metadata %>%
  filter(Population == "Cold_wet" & treatment %in% c("Acclimation", "Antibiotics"))

genome_counts_row <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% 
    select(one_of(c("genome",sample_sub$sample))) %>%
  column_to_rownames(., "genome") 

GIFTs_elements_community <- to.community(GIFTs_elements_filtered,genome_counts_row,GIFT_db)
GIFTs_functions_community <- to.community(GIFTs_functions,genome_counts_row,GIFT_db)
GIFTs_domains_community <- to.community(GIFTs_domains,genome_counts_row,GIFT_db)

4.3.5.1 Community elements differences:

MCI

GIFTs_elements_community %>%
  rowMeans() %>%
  as_tibble(., rownames = "sample") %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>%
  group_by(treatment) %>%
  summarise(MCI = mean(value), sd = sd(value))
# A tibble: 2 × 3
  treatment     MCI     sd
  <chr>       <dbl>  <dbl>
1 Acclimation 0.335 0.0324
2 Antibiotics 0.247 0.136 
MCI <- GIFTs_elements_community %>%
  rowMeans() %>%
  as_tibble(., rownames = "sample") %>%
  left_join(sample_metadata, by = join_by(sample == sample)) 

shapiro.test(MCI$value)

    Shapiro-Wilk normality test

data:  MCI$value
W = 0.94332, p-value = 0.09304
wilcox.test(value ~ treatment, data=MCI)

    Wilcoxon rank sum exact test

data:  value by treatment
W = 190, p-value = 0.01768
alternative hypothesis: true location shift is not equal to 0
element_gift <- GIFTs_elements_community %>% 
  as.data.frame() %>% 
  rownames_to_column(., "sample") %>% 
  left_join(., sample_metadata[c(12,13)], by=join_by("sample"=="sample"))
uniqueGIFT_db<- unique(GIFT_db[c(2,4,5,6)]) %>% unite("Function",Function:Element, sep= "_", remove=FALSE)

significant_elements <- element_gift %>%
    pivot_longer(-c(sample,treatment), names_to = "trait", values_to = "value") %>%
    group_by(trait) %>%
    summarise(p_value = wilcox.test(value ~ treatment)$p.value) %>%
    mutate(p_adjust=p.adjust(p_value, method="BH")) %>%
    filter(p_adjust < 0.05)%>%
  left_join(.,uniqueGIFT_db[c(1,3)],by = join_by(trait == Code_element))

element_gift_t <- element_gift  %>% 
  t() %>%
  row_to_names(row_number = 1) %>%
  as.data.frame() %>%
  mutate_if(is.character, as.numeric)  %>%
  rownames_to_column(., "trait")

element_gift_filt <- subset(element_gift_t, trait %in% significant_elements$trait) %>% 
  t() %>%
  row_to_names(row_number = 1) %>%
  as.data.frame() %>%
  mutate_if(is.character, as.numeric)  %>%
  rownames_to_column(., "sample")%>% 
  left_join(., sample_metadata[c(12,13)], by = join_by(sample == sample))

difference_table <- element_gift_filt %>%
  select(-sample) %>%
  group_by(treatment) %>%
  summarise(across(everything(), mean)) %>%
  t() %>%
  row_to_names(row_number = 1) %>%
  as.data.frame() %>%
  mutate_if(is.character, as.numeric) %>%
  rownames_to_column(., "Elements") %>%
  left_join(.,uniqueGIFT_db[c(1,3,4)],by = join_by(Elements == Code_element)) %>% 
  arrange(Function) %>% 
  mutate(Difference=Acclimation-Antibiotics)%>% 
  mutate(group_color = ifelse(Difference <0, "Antibiotics","Acclimation")) 
difference_table %>%
  ggplot(aes(x=forcats::fct_reorder(Function,Difference), y=Difference, fill=group_color)) + 
  geom_col() +
#  geom_point(size=4) + 
  scale_fill_manual(values=treatment_colors) +
  geom_hline(yintercept=0) + 
  coord_flip()+
  theme(axis.text = element_text(size = 10),
        axis.title = element_text(size = 12),
        legend.position = "right", 
        panel.background = element_blank(),
          panel.grid.major = element_line(size = 0.15, linetype = 'solid',
                                colour = "grey"))+
  xlab("Microbial Functional Capacity") + 
  ylab("Mean difference")+
  labs(fill="Treatment")

4.3.5.2 Community functions differences

MCI

GIFTs_functions_community %>%
  rowMeans() %>%
  as_tibble(., rownames = "sample") %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>%
  group_by(treatment) %>%
  summarise(MCI = mean(value), sd = sd(value))
# A tibble: 2 × 3
  treatment     MCI     sd
  <chr>       <dbl>  <dbl>
1 Acclimation 0.330 0.0320
2 Antibiotics 0.256 0.126 
MCI <- GIFTs_functions_community %>%
  rowMeans() %>%
  as_tibble(., rownames = "sample") %>%
  left_join(sample_metadata, by = join_by(sample == sample)) 

shapiro.test(MCI$value)

    Shapiro-Wilk normality test

data:  MCI$value
W = 0.95742, p-value = 0.2331
wilcox.test(value ~ treatment, data=MCI)

    Wilcoxon rank sum exact test

data:  value by treatment
W = 188, p-value = 0.02195
alternative hypothesis: true location shift is not equal to 0
function_gift <- GIFTs_functions_community %>% 
  as.data.frame() %>% 
  rownames_to_column(., "sample") %>% 
  merge(., sample_metadata[c(12,13)], by="sample")
unique_funct_db<- GIFT_db[c(3,4,5)] %>% 
  distinct(Code_function, .keep_all = TRUE)

significant_functional <- function_gift %>%
  pivot_longer(-c(sample,treatment), names_to = "trait", values_to = "value") %>%
  group_by(trait) %>%
  summarise(p_value = wilcox.test(value ~ treatment)$p.value) %>%
  mutate(p_adjust=p.adjust(p_value, method="BH")) %>%
  filter(p_adjust < 0.05)%>%
  left_join(.,unique_funct_db[c(1,3)],by = join_by(trait == Code_function))
Code_function Acclimation Antibiotics Function Difference treatment
B06 0.68110280 0.47325490 Organic anion biosynthesis 0.20784790 Acclimation
B02 0.59963040 0.41562100 Amino acid biosynthesis 0.18400940 Acclimation
D02 0.38587770 0.20636760 Polysaccharide degradation 0.17951010 Acclimation
S03 0.27103471 0.09357224 Spore 0.17746247 Acclimation
B01 0.84215140 0.69078910 Nucleic acid biosynthesis 0.15136230 Acclimation
B07 0.44558700 0.30432910 Vitamin biosynthesis 0.14125790 Acclimation
B08 0.44596150 0.32044710 Aromatic compound biosynthesis 0.12551440 Acclimation
D09 0.25533360 0.13438350 Antibiotic degradation 0.12095010 Acclimation
B04 0.54457570 0.42921780 SCFA biosynthesis 0.11535790 Acclimation
D03 0.29210750 0.20698550 Sugar degradation 0.08512200 Acclimation
D05 0.28856110 0.22258820 Amino acid degradation 0.06597290 Acclimation
B10 0.05947806 0.03621687 Antibiotic biosynthesis 0.02326119 Acclimation

4.4 Does antibiotic administration remove the differences found in the two populations?

4.4.1 Shapiro test

alpha_div %>%
  pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
  left_join(., sample_metadata, by = join_by(sample == Tube_code)) %>%
  filter(time_point == "Antibiotics" ) %>%
  filter(metric=="richness") %>% 
  summarize(shapiro_p_value = shapiro.test(value)$p.value) %>%
  pull(shapiro_p_value) #0.001-->wilcox test
[1] 0.001165318
alpha_div %>%
  pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
  left_join(., sample_metadata, by = join_by(sample == Tube_code)) %>%
  filter(time_point == "Antibiotics" ) %>%
  filter(metric=="neutral") %>% 
  summarize(shapiro_p_value = shapiro.test(value)$p.value) %>%
  pull(shapiro_p_value) #0.0003-->wilcox test
[1] 0.0003462674
alpha_div %>%
  pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
  left_join(., sample_metadata, by = join_by(sample == Tube_code)) %>%
  filter(time_point == "Antibiotics" ) %>%
  filter(metric=="phylogenetic") %>% 
  summarize(shapiro_p_value = shapiro.test(value)$p.value) %>%
  pull(shapiro_p_value)
[1] 0.0659697

4.4.2 Alpha diversity

alpha_div_meta <- alpha_div %>%
  left_join(sample_metadata, by = join_by(sample == sample))%>%
  filter(time_point == "Antibiotics" )
alpha_div %>%
  pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
  left_join(., sample_metadata, by = "sample") %>%
  mutate(metric=factor(metric,levels=c("richness","neutral"))) %>%
  filter(metric!="phylogenetic") %>%
  filter(time_point == "Antibiotics" ) %>% 
  ggplot(aes(y = value, x = Population, color=Population, fill=Population)) +
  geom_jitter(width = 0.2, show.legend = FALSE) +
  geom_boxplot(width = 0.5, alpha=0.5, outlier.shape = NA, show.legend = FALSE) +
  scale_color_manual(values=treatment_colors)+
  scale_fill_manual(values=treatment_colors) +
      facet_wrap(. ~ metric, scales = "free") +
  stat_compare_means(method = "wilcox.test", show.legend = F, size = 3, label.x = c(1.5))+
      theme_classic() +
        theme(
    strip.background = element_blank(),
    panel.grid.minor.x = element_line(size = .1, color = "grey"),
    axis.title.x = element_blank(),
    axis.title.y = element_text(size=10),
    axis.text.x = element_text(angle = 45, hjust = 1),
    # Increase plot size
    plot.title = element_text(size = 10),
    axis.text = element_text(size = 8),
    axis.title = element_text(size = 8)
      ) +
  ylab("Alpha diversity")

alpha_div %>%
  pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
  left_join(., sample_metadata, by = "sample") %>%
  mutate(metric=factor(metric,levels=c("phylogenetic"))) %>%
  filter(time_point == "Antibiotics" ) %>% 
  ggplot(aes(y = value, x = Population, color=Population, fill=Population)) +
  geom_jitter(width = 0.2, show.legend = FALSE) +
  geom_boxplot(width = 0.5, alpha=0.5, outlier.shape = NA, show.legend = FALSE) +
  scale_color_manual(values=treatment_colors)+
  scale_fill_manual(values=treatment_colors) +
  stat_compare_means(method = "t.test", show.legend = F, size = 3, label.x = c(1.5))+
      theme_classic() +
        theme(
    strip.background = element_blank(),
    panel.grid.minor.x = element_line(size = .1, color = "grey"),
    axis.title.x = element_blank(),
    axis.title.y = element_text(size=10),
    axis.text.x = element_text(angle = 45, hjust = 1),
    # Increase plot size
    plot.title = element_text(size = 10),
    axis.text = element_text(size = 8),
    axis.title = element_text(size = 8)
      ) +
  ylab("Alpha diversity")

4.4.3 Beta diversity

samples_to_keep <- sample_metadata %>%
  filter(time_point == "Antibiotics") %>% 
  select(Tube_code) %>% 
  pull()
subset_meta <- sample_metadata %>%
  filter(time_point == "Antibiotics")

Richness

richness <- as.matrix(beta_q0n$S)
richness <- as.dist(richness[rownames(richness) %in% samples_to_keep,
               colnames(richness) %in% samples_to_keep])

betadisper(richness, subset_meta$Population) %>% permutest(.) 

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df   Sum Sq   Mean Sq      F N.Perm Pr(>F)  
Groups     1 0.015377 0.0153774 6.8934    999  0.017 *
Residuals 21 0.046845 0.0022307                       
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(richness ~ Population,
        data = subset_meta %>% arrange(match(Tube_code,labels(richness))),
        permutations = 999) %>%
        broom::tidy() %>%
        tt()
term df SumOfSqs R2 statistic p.value
Model 1 1.361743 0.1533845 3.80465 0.001
Residual 21 7.516224 0.8466155 NA NA
Total 22 8.877966 1.0000000 NA NA

Neutral

neutral <- as.matrix(beta_q1n$S)
neutral <- as.dist(neutral[rownames(neutral) %in% samples_to_keep,
               colnames(neutral) %in% samples_to_keep])

betadisper(neutral, subset_meta$Population) %>% permutest(.) 

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df   Sum Sq   Mean Sq      F N.Perm Pr(>F)  
Groups     1 0.030649 0.0306488 3.8694    999  0.077 .
Residuals 21 0.166339 0.0079209                       
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(neutral ~ Population,
        data = subset_meta %>% arrange(match(Tube_code,labels(neutral))),
        permutations = 999) %>%
        broom::tidy() %>%
        tt()
term df SumOfSqs R2 statistic p.value
Model 1 1.787698 0.208772 5.541022 0.001
Residual 21 6.775221 0.791228 NA NA
Total 22 8.562919 1.000000 NA NA

Phylogenetic

phylo <- as.matrix(beta_q1p$S)
phylo <- as.dist(phylo[rownames(phylo) %in% samples_to_keep,
               colnames(phylo) %in% samples_to_keep])
betadisper(phylo, subset_meta$Population) %>% permutest(.) 

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df   Sum Sq  Mean Sq      F N.Perm Pr(>F)
Groups     1 0.012165 0.012165 0.9979    999  0.339
Residuals 21 0.256012 0.012191                     
adonis2(phylo ~ Population,
        data = subset_meta %>% arrange(match(Tube_code,labels(phylo))),
        permutations = 999) %>%
        broom::tidy() %>%
        tt()
term df SumOfSqs R2 statistic p.value
Model 1 0.8970751 0.1890846 4.896659 0.001
Residual 21 3.8472307 0.8109154 NA NA
Total 22 4.7443057 1.0000000 NA NA

4.5 Are the microbial communities similar in both donor samples?

4.5.1 Alpha diversity

alpha_div_meta <- alpha_div %>%
  left_join(sample_metadata, by = join_by(sample == sample))%>%
  filter(type =="Hot_control" & time_point %in% c( "Transplant1", "Transplant2"))
samples_to_keep <- sample_metadata %>%
  filter(type =="Hot_control" & time_point %in% c( "Transplant1", "Transplant2"))%>% 
  select(sample) %>% 
  pull()
subset_meta <- sample_metadata %>%
  filter(type =="Hot_control" & time_point %in% c( "Transplant1", "Transplant2"))
alpha_div %>%
  pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
  left_join(., sample_metadata, by = "sample") %>%
  mutate(metric=factor(metric,levels=c("richness","neutral","phylogenetic"))) %>%
  filter(type =="Hot_control" & time_point %in% c( "Transplant1", "Transplant2")) %>% 
  ggplot(aes(y = value, x = time_point, color=time_point, fill=time_point)) +
  geom_jitter(width = 0.2, show.legend = FALSE) +
  geom_boxplot(width = 0.5, alpha=0.5, outlier.shape = NA, show.legend = FALSE) +
  scale_color_manual(values=treatment_colors)+
  scale_fill_manual(values=treatment_colors) +
  facet_wrap(. ~ metric, scales = "free") +
  theme_classic() +
  theme(
    strip.background = element_blank(),
    panel.grid.minor.x = element_line(size = .1, color = "grey"),
    axis.title.x = element_blank(),
    axis.title.y = element_text(size=10),
    axis.text.x = element_text(angle = 45, hjust = 1),
    # Increase plot size
    plot.title = element_text(size = 10),
    axis.text = element_text(size = 8),
    axis.title = element_text(size = 8)
      ) +
  ylab("Alpha diversity")

Richness

Modelq0GLMMNB <- glmer.nb(richness ~ time_point+(1|individual), data = alpha_div_meta)
summary(Modelq0GLMMNB)
Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
 Family: Negative Binomial(17.9668)  ( log )
Formula: richness ~ time_point + (1 | individual)
   Data: alpha_div_meta

     AIC      BIC   logLik deviance df.resid 
   150.4    153.3    -71.2    142.4       11 

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.02956 -0.52933  0.03513  0.56378  1.56130 

Random effects:
 Groups     Name        Variance Std.Dev.
 individual (Intercept) 0.009989 0.09995 
Number of obs: 15, groups:  individual, 8

Fixed effects:
                      Estimate Std. Error z value Pr(>|z|)    
(Intercept)            4.60703    0.09905  46.514   <2e-16 ***
time_pointTransplant2  0.05482    0.13299   0.412     0.68    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr)
tm_pntTrns2 -0.624
emmeans::emmeans(Modelq0GLMMNB, pairwise ~ time_point)
$emmeans
 time_point  emmean    SE  df asymp.LCL asymp.UCL
 Transplant1   4.61 0.099 Inf      4.41      4.80
 Transplant2   4.66 0.105 Inf      4.46      4.87

Results are given on the log (not the response) scale. 
Confidence level used: 0.95 

$contrasts
 contrast                  estimate    SE  df z.ratio p.value
 Transplant1 - Transplant2  -0.0548 0.133 Inf  -0.412  0.6802

Results are given on the log (not the response) scale. 

Neutral

Model_neutral <- lme(fixed = neutral ~ time_point, data = alpha_div_meta,
               random = ~ 1 | individual)
summary(Model_neutral)
Linear mixed-effects model fit by REML
  Data: alpha_div_meta 
       AIC      BIC    logLik
  115.9183 118.1781 -53.95914

Random effects:
 Formula: ~1 | individual
        (Intercept) Residual
StdDev:    9.533528 10.15744

Fixed effects:  neutral ~ time_point 
                         Value Std.Error DF  t-value p-value
(Intercept)           43.94053  4.925212  7 8.921551  0.0000
time_pointTransplant2  4.31623  5.338413  6 0.808523  0.4497
 Correlation: 
                      (Intr)
time_pointTransplant2 -0.491

Standardized Within-Group Residuals:
       Min         Q1        Med         Q3        Max 
-1.5771427 -0.5676402  0.0540640  0.5533248  1.1275055 

Number of Observations: 15
Number of Groups: 8 

Phylogenetic

Model_phylo <- lme(fixed = phylogenetic ~ time_point, data = alpha_div_meta,
               random = ~ 1 | individual)
summary(Model_phylo)
Linear mixed-effects model fit by REML
  Data: alpha_div_meta 
       AIC      BIC    logLik
  41.34174 43.60154 -16.67087

Random effects:
 Formula: ~1 | individual
        (Intercept)  Residual
StdDev:    1.170147 0.3061822

Fixed effects:  phylogenetic ~ time_point 
                          Value Std.Error DF  t-value p-value
(Intercept)            5.813636 0.4276378  7 13.59477  0.0000
time_pointTransplant2 -0.018484 0.1633332  6 -0.11317  0.9136
 Correlation: 
                      (Intr)
time_pointTransplant2 -0.168

Standardized Within-Group Residuals:
        Min          Q1         Med          Q3         Max 
-1.24253815 -0.37487798 -0.05019741  0.45701812  1.31723618 

Number of Observations: 15
Number of Groups: 8 

4.5.2 Beta diversity

Richness

richness <- as.matrix(beta_q0n$S)
richness <- as.dist(richness[rownames(richness) %in% samples_to_keep,
               colnames(richness) %in% samples_to_keep])

betadisper(richness, subset_meta$treatment) %>% permutest(.) 

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df  Sum Sq   Mean Sq      F N.Perm Pr(>F)
Groups     1 0.00014 0.0001396 0.0173    999  0.918
Residuals 13 0.10481 0.0080621                     
adonis2(richness ~ treatment,
        data = subset_meta %>% arrange(match(sample,labels(richness))),
        permutations = 999,
        strata = subset_meta %>% arrange(match(Tube_code,labels(richness))) %>% pull(individual)) %>%
        broom::tidy() %>%
        tt()
term df SumOfSqs R2 statistic p.value
Model 1 0.06889268 0.03010712 0.4035421 0.9375
Residual 13 2.21935895 0.96989288 NA NA
Total 14 2.28825162 1.00000000 NA NA

Neutral

neutral <- as.matrix(beta_q1n$S)
neutral <- as.dist(neutral[rownames(neutral) %in% samples_to_keep,
               colnames(neutral) %in% samples_to_keep])

betadisper(neutral, subset_meta$treatment) %>% permutest(.) 

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df   Sum Sq   Mean Sq      F N.Perm Pr(>F)
Groups     1 0.000655 0.0006546 0.0514    999  0.796
Residuals 13 0.165409 0.0127237                     
adonis2(neutral ~ treatment,
        data = subset_meta %>% arrange(match(Tube_code,labels(neutral))),
        permutations = 999,
        strata = subset_meta %>% arrange(match(Tube_code,labels(neutral))) %>% pull(individual)) %>%
        broom::tidy() %>%
        tt()
term df SumOfSqs R2 statistic p.value
Model 1 0.09294671 0.03882177 0.5250671 0.7265625
Residual 13 2.30124351 0.96117823 NA NA
Total 14 2.39419022 1.00000000 NA NA

Phylogenetic

phylo <- as.matrix(beta_q1p$S)
phylo <- as.dist(phylo[rownames(phylo) %in% samples_to_keep,
               colnames(phylo) %in% samples_to_keep])
betadisper(phylo, subset_meta$treatment) %>% permutest(.) 

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df   Sum Sq   Mean Sq      F N.Perm Pr(>F)
Groups     1 0.003691 0.0036912 0.3071    999   0.67
Residuals 13 0.156266 0.0120205                     
adonis2(phylo ~ treatment,
        data = subset_meta %>% arrange(match(Tube_code,labels(phylo))),
        permutations = 999,
        strata = subset_meta %>% arrange(match(Tube_code,labels(phylo))) %>% pull(individual)) %>%
        broom::tidy() %>%
        tt()
term df SumOfSqs R2 statistic p.value
Model 1 0.009773632 0.01921019 0.2546239 0.7734375
Residual 13 0.498999565 0.98078981 NA NA
Total 14 0.508773196 1.00000000 NA NA

4.6 Does the donor sample maintain the microbial community found in acclimation?

sample_metadata <- sample_metadata %>%
    mutate(treatment=case_when(
    treatment %in% c("Transplant1", "Transplant2") ~ "Transplant",
    TRUE ~ treatment
  ))

alpha_div_meta <- alpha_div %>%
  left_join(sample_metadata, by = join_by(sample == sample))%>%
  filter(type == "Hot_control" & treatment %in% c("Acclimation","Transplant"))
sample_metadata <- sample_metadata %>%
    mutate(treatment=case_when(
    treatment %in% c("Transplant1", "Transplant2") ~ "Transplant",
    TRUE ~ treatment
  ))
samples_to_keep <- sample_metadata %>%
  filter(type == "Hot_control" & treatment %in% c("Acclimation","Transplant")) %>% 
  select(sample) %>% 
  pull()
subset_meta <- sample_metadata %>%
  filter(type == "Hot_control" & treatment %in% c("Acclimation","Transplant"))

4.6.1 Alpha diversity

alpha_div %>%
  pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
  left_join(., sample_metadata, by = "sample") %>%
  mutate(metric=factor(metric,levels=c("richness","neutral","phylogenetic"))) %>%
  filter(type == "Hot_control" & treatment %in% c("Acclimation","Transplant")) %>% 
  ggplot(aes(y = value, x = treatment, color=treatment, fill=treatment)) +
  geom_jitter(width = 0.2, show.legend = FALSE) +
  geom_boxplot(width = 0.5, alpha=0.5, outlier.shape = NA, show.legend = FALSE) +
  scale_color_manual(values=treatment_colors)+
  scale_fill_manual(values=treatment_colors) +
  facet_wrap(. ~ metric, scales = "free") +
  theme_classic() +
  theme(
    strip.background = element_blank(),
    panel.grid.minor.x = element_line(size = .1, color = "grey"),
    axis.title.x = element_blank(),
    axis.title.y = element_text(size=10),
    axis.text.x = element_text(angle = 45, hjust = 1),
    # Increase plot size
    plot.title = element_text(size = 10),
    axis.text = element_text(size = 8),
    axis.title = element_text(size = 8)
      ) +
  ylab("Alpha diversity")

Richness

Modelq0GLMMNB <- glmer.nb(richness ~ treatment+(1|individual), data = alpha_div_meta)
summary(Modelq0GLMMNB)
Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
 Family: Negative Binomial(19.6072)  ( log )
Formula: richness ~ treatment + (1 | individual)
   Data: alpha_div_meta

     AIC      BIC   logLik deviance df.resid 
   228.0    232.7   -110.0    220.0       20 

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.37872 -0.53180  0.06467  0.46904  1.76837 

Random effects:
 Groups     Name        Variance Std.Dev.
 individual (Intercept) 0        0       
Number of obs: 24, groups:  individual, 9

Fixed effects:
                    Estimate Std. Error z value Pr(>|z|)    
(Intercept)           4.4569     0.0834  53.441   <2e-16 ***
treatmentTransplant   0.1855     0.1049   1.769   0.0769 .  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr)
trtmntTrnsp -0.795
optimizer (Nelder_Mead) convergence code: 0 (OK)
boundary (singular) fit: see help('isSingular')
emmeans::emmeans(Modelq0GLMMNB, pairwise ~ treatment)
$emmeans
 treatment   emmean     SE  df asymp.LCL asymp.UCL
 Acclimation   4.46 0.0834 Inf      4.29      4.62
 Transplant    4.64 0.0636 Inf      4.52      4.77

Results are given on the log (not the response) scale. 
Confidence level used: 0.95 

$contrasts
 contrast                 estimate    SE  df z.ratio p.value
 Acclimation - Transplant   -0.186 0.105 Inf  -1.769  0.0769

Results are given on the log (not the response) scale. 

Neutral

Model_neutral <- lme(fixed = neutral ~ treatment, data = alpha_div_meta,
               random = ~ 1 | individual)
summary(Model_neutral)
Linear mixed-effects model fit by REML
  Data: alpha_div_meta 
       AIC      BIC    logLik
  185.3367 189.7009 -88.66837

Random effects:
 Formula: ~1 | individual
        (Intercept) Residual
StdDev: 0.001909919 12.18199

Fixed effects:  neutral ~ treatment 
                       Value Std.Error DF   t-value p-value
(Intercept)         44.09058  4.060663 14 10.857976  0.0000
treatmentTransplant  1.80350  5.136377 14  0.351122  0.7307
 Correlation: 
                    (Intr)
treatmentTransplant -0.791

Standardized Within-Group Residuals:
        Min          Q1         Med          Q3         Max 
-2.66610957 -0.48314823 -0.03902932  0.62479438  2.02597978 

Number of Observations: 24
Number of Groups: 9 

Phylogenetic

Model_phylo <- lme(fixed = phylogenetic ~ treatment, data = alpha_div_meta,
               random = ~ 1 | individual)
summary(Model_phylo)
Linear mixed-effects model fit by REML
  Data: alpha_div_meta 
       AIC      BIC    logLik
  80.98805 85.35222 -36.49402

Random effects:
 Formula: ~1 | individual
        (Intercept)  Residual
StdDev:   0.7250943 0.9664077

Fixed effects:  phylogenetic ~ treatment 
                        Value Std.Error DF   t-value p-value
(Intercept)          6.496242 0.4027276 14 16.130609  0.0000
treatmentTransplant -0.581429 0.4143087 14 -1.403373  0.1823
 Correlation: 
                    (Intr)
treatmentTransplant -0.622

Standardized Within-Group Residuals:
         Min           Q1          Med           Q3          Max 
-2.329601943 -0.568031685 -0.001998197  0.552071283  1.528979022 

Number of Observations: 24
Number of Groups: 9 

4.6.2 Beta diversity

Richness

richness <- as.matrix(beta_q0n$S)
richness <- as.dist(richness[rownames(richness) %in% samples_to_keep,
               colnames(richness) %in% samples_to_keep])

betadisper(richness, subset_meta$treatment) %>% permutest(.) 

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df  Sum Sq   Mean Sq      F N.Perm Pr(>F)
Groups     1 0.00836 0.0083600 1.4409    999  0.269
Residuals 22 0.12764 0.0058019                     
adonis2(richness ~ treatment,
        data = subset_meta %>% arrange(match(sample,labels(richness))),
        permutations = 999,
        strata = subset_meta %>% arrange(match(Tube_code,labels(richness))) %>% pull(individual)) %>%
        broom::tidy() %>%
        tt()
term df SumOfSqs R2 statistic p.value
Model 1 0.2657351 0.06396199 1.503319 0.118
Residual 22 3.8888430 0.93603801 NA NA
Total 23 4.1545781 1.00000000 NA NA

Neutral

neutral <- as.matrix(beta_q1n$S)
neutral <- as.dist(neutral[rownames(neutral) %in% samples_to_keep,
               colnames(neutral) %in% samples_to_keep])

betadisper(neutral, subset_meta$treatment) %>% permutest(.) 

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df   Sum Sq   Mean Sq      F N.Perm Pr(>F)
Groups     1 0.000661 0.0006614 0.0736    999  0.808
Residuals 22 0.197804 0.0089911                     
adonis2(neutral ~ treatment,
        data = subset_meta %>% arrange(match(sample,labels(neutral))),
        permutations = 999,
        strata = subset_meta %>% arrange(match(Tube_code,labels(neutral))) %>% pull(individual)) %>%
        broom::tidy() %>%
        tt()
term df SumOfSqs R2 statistic p.value
Model 1 0.3164816 0.07596593 1.808646 0.083
Residual 22 3.8496178 0.92403407 NA NA
Total 23 4.1660995 1.00000000 NA NA
neutral %>%
  vegan::metaMDS(., trymax = 500, k = 2, trace=0) %>%
  vegan::scores() %>%
  as_tibble(., rownames = "sample") %>%
  dplyr::left_join(subset_meta, by = join_by(sample == sample)) %>%
  group_by(treatment) %>%
  mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
  mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
  ungroup() %>%
  ggplot(aes(x = NMDS1, y = NMDS2, color = treatment)) +
  geom_point(size = 4) +
 # scale_color_manual(values = treatment_colors,labels=c("Cold_wet" = "Cold wet", "Hot_dry" = "Hot dry")) +
  geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2), alpha = 0.9, show.legend = FALSE) +
  theme(
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    panel.background = element_blank(),
    axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
    legend.text = element_text(size = 10),
    legend.title = element_text(size = 12),
    legend.position = "right", legend.box = "vertical"
    )

Phylogenetic

phylo <- as.matrix(beta_q1p$S)
phylo <- as.dist(phylo[rownames(phylo) %in% samples_to_keep,
               colnames(phylo) %in% samples_to_keep])
betadisper(phylo, subset_meta$treatment) %>% permutest(.) 

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df  Sum Sq   Mean Sq      F N.Perm Pr(>F)
Groups     1 0.00204 0.0020405 0.2622    999  0.704
Residuals 22 0.17118 0.0077807                     
adonis2(phylo ~ treatment,
        data = subset_meta %>% arrange(match(sample,labels(phylo))),
        permutations = 999,
        strata = subset_meta %>% arrange(match(Tube_code,labels(phylo))) %>% pull(individual)) %>%
        broom::tidy() %>%
        tt()
term df SumOfSqs R2 statistic p.value
Model 1 0.0412056 0.056244 1.31111 0.227
Residual 22 0.6914166 0.943756 NA NA
Total 23 0.7326222 1.000000 NA NA
phylo %>%
  vegan::metaMDS(., trymax = 500, k = 2, trace=0) %>%
  vegan::scores() %>%
  as_tibble(., rownames = "sample") %>%
  dplyr::left_join(subset_meta, by = join_by(sample == sample)) %>%
  group_by(treatment) %>%
  mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
  mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
  ungroup() %>%
  ggplot(aes(x = NMDS1, y = NMDS2, color = treatment)) +
  geom_point(size = 4) +
  geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2), alpha = 0.9, show.legend = FALSE) +
  theme(
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    panel.background = element_blank(),
    axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
    legend.text = element_text(size = 10),
    legend.title = element_text(size = 12),
    legend.position = "right", legend.box = "vertical"
    )

4.7 Is the donor sample microbiota different to recipient microbiota?

samples_to_keep <- sample_metadata %>%
  filter(type == "Treatment" & treatment %in% c("Acclimation", "Transplant")) %>% 
  select(sample) %>% 
  pull()
subset_meta <- sample_metadata %>%
  filter(type == "Treatment" & treatment %in% c("Acclimation", "Transplant"))

4.7.1 Alpha diversity

sample_metadata <- sample_metadata %>%
    mutate(treatment=case_when(
    treatment %in% c("Transplant1", "Transplant2") ~ "Transplant",
    TRUE ~ treatment
  ))

alpha_div_meta <- alpha_div %>%
  left_join(sample_metadata, by = join_by(sample == sample))%>%
  filter(type == "Treatment" & treatment %in% c("Acclimation","Transplant"))
alpha_div %>%
  pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
  left_join(., sample_metadata, by = "sample") %>%
  mutate(metric=factor(metric,levels=c("richness","neutral","phylogenetic"))) %>%
  filter(type == "Treatment" & treatment %in% c("Acclimation", "Transplant")) %>% 
  ggplot(aes(y = value, x = treatment, color=treatment, fill=treatment)) +
  geom_jitter(width = 0.2, show.legend = FALSE) +
  geom_boxplot(width = 0.5, alpha=0.5, outlier.shape = NA, show.legend = FALSE) +
  scale_color_manual(values=treatment_colors)+
  scale_fill_manual(values=treatment_colors) +
  facet_wrap(. ~ metric, scales = "free") +
  theme_classic() +
  theme(
    strip.background = element_blank(),
    panel.grid.minor.x = element_line(size = .1, color = "grey"),
    axis.title.x = element_blank(),
    axis.title.y = element_text(size=10),
    axis.text.x = element_text(angle = 45, hjust = 1),
    # Increase plot size
    plot.title = element_text(size = 10),
    axis.text = element_text(size = 8),
    axis.title = element_text(size = 8)
      ) +
  ylab("Alpha diversity")

Richness

Modelq0GLMMNB <- glmer.nb(richness ~ time_point+(1|individual), data = alpha_div_meta)
summary(Modelq0GLMMNB)
Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
 Family: Negative Binomial(6.5468)  ( log )
Formula: richness ~ time_point + (1 | individual)
   Data: alpha_div_meta

     AIC      BIC   logLik deviance df.resid 
   221.1    226.6   -105.6    211.1       17 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-1.9657 -0.5696  0.1043  0.5578  1.2251 

Random effects:
 Groups     Name        Variance  Std.Dev. 
 individual (Intercept) 4.688e-12 2.165e-06
Number of obs: 22, groups:  individual, 9

Fixed effects:
                      Estimate Std. Error z value Pr(>|z|)    
(Intercept)             3.8067     0.1394  27.301  < 2e-16 ***
time_pointTransplant1   0.7956     0.2066   3.851 0.000118 ***
time_pointTransplant2   0.8893     0.2155   4.127 3.67e-05 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) tm_pT1
tm_pntTrns1 -0.675       
tm_pntTrns2 -0.647  0.437
optimizer (Nelder_Mead) convergence code: 0 (OK)
boundary (singular) fit: see help('isSingular')
emmeans::emmeans(Modelq0GLMMNB, pairwise ~ time_point)
$emmeans
 time_point  emmean    SE  df asymp.LCL asymp.UCL
 Acclimation   3.81 0.139 Inf      3.53      4.08
 Transplant1   4.60 0.152 Inf      4.30      4.90
 Transplant2   4.70 0.164 Inf      4.37      5.02

Results are given on the log (not the response) scale. 
Confidence level used: 0.95 

$contrasts
 contrast                  estimate    SE  df z.ratio p.value
 Acclimation - Transplant1  -0.7956 0.207 Inf  -3.851  0.0003
 Acclimation - Transplant2  -0.8893 0.215 Inf  -4.127  0.0001
 Transplant1 - Transplant2  -0.0936 0.224 Inf  -0.418  0.9083

Results are given on the log (not the response) scale. 
P value adjustment: tukey method for comparing a family of 3 estimates 

Neutral

Model_neutral <- lme(fixed = neutral ~ time_point, data = alpha_div_meta,
               random = ~ 1 | individual)
summary(Model_neutral)
Linear mixed-effects model fit by REML
  Data: alpha_div_meta 
       AIC      BIC    logLik
  164.8923 169.6144 -77.44613

Random effects:
 Formula: ~1 | individual
        (Intercept) Residual
StdDev:    4.615625 11.39098

Fixed effects:  neutral ~ time_point 
                         Value Std.Error DF  t-value p-value
(Intercept)           17.31015  4.096860 11 4.225224  0.0014
time_pointTransplant1 25.57767  5.790892 11 4.416879  0.0010
time_pointTransplant2 32.41010  6.083229 11 5.327778  0.0002
 Correlation: 
                      (Intr) tm_pT1
time_pointTransplant1 -0.608       
time_pointTransplant2 -0.578  0.426

Standardized Within-Group Residuals:
        Min          Q1         Med          Q3         Max 
-2.21937399 -0.53764773  0.08095672  0.58946091  1.48158276 

Number of Observations: 22
Number of Groups: 9 

Phylogenetic

Model_phylo <- lme(fixed = phylogenetic ~ time_point, data = alpha_div_meta,
               random = ~ 1 | individual)
summary(Model_phylo)
Linear mixed-effects model fit by REML
  Data: alpha_div_meta 
       AIC     BIC    logLik
  82.23931 86.9615 -36.11965

Random effects:
 Formula: ~1 | individual
        (Intercept) Residual
StdDev:   0.8338315 1.180605

Fixed effects:  phylogenetic ~ time_point 
                         Value Std.Error DF   t-value p-value
(Intercept)           5.515026 0.4817910 11 11.446928  0.0000
time_pointTransplant1 0.201736 0.6072186 11  0.332230  0.7460
time_pointTransplant2 0.226173 0.6404589 11  0.353142  0.7307
 Correlation: 
                      (Intr) tm_pT1
time_pointTransplant1 -0.529       
time_pointTransplant2 -0.502  0.436

Standardized Within-Group Residuals:
        Min          Q1         Med          Q3         Max 
-2.34035811 -0.43258725 -0.08049281  0.52082148  1.58368872 

Number of Observations: 22
Number of Groups: 9 

4.7.2 Beta diversity

Richness

richness <- as.matrix(beta_q0n$S)
richness <- as.dist(richness[rownames(richness) %in% samples_to_keep,
               colnames(richness) %in% samples_to_keep])

betadisper(richness, subset_meta$treatment) %>% permutest(.) 

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df  Sum Sq  Mean Sq      F N.Perm Pr(>F)   
Groups     1 0.12795 0.127946 13.179    999  0.003 **
Residuals 20 0.19416 0.009708                        
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(richness ~ treatment,
        data = subset_meta %>% arrange(match(sample,labels(richness))),
        permutations = 999,
        strata = subset_meta %>% arrange(match(Tube_code,labels(richness))) %>% pull(individual)) %>%
        broom::tidy() %>%
        tt()
term df SumOfSqs R2 statistic p.value
Model 1 1.777815 0.2760196 7.625059 0.001
Residual 20 4.663086 0.7239804 NA NA
Total 21 6.440902 1.0000000 NA NA

Neutral

neutral <- as.matrix(beta_q1n$S)
neutral <- as.dist(neutral[rownames(neutral) %in% samples_to_keep,
               colnames(neutral) %in% samples_to_keep])

betadisper(neutral, subset_meta$treatment) %>% permutest(.) 

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df   Sum Sq  Mean Sq      F N.Perm Pr(>F)  
Groups     1 0.071502 0.071502 5.4967    999  0.023 *
Residuals 20 0.260166 0.013008                       
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(neutral ~ treatment,
        data = subset_meta %>% arrange(match(sample,labels(neutral))),
        permutations = 999,
        strata = subset_meta %>% arrange(match(Tube_code,labels(neutral))) %>% pull(individual)) %>%
        broom::tidy() %>%
        tt()
term df SumOfSqs R2 statistic p.value
Model 1 2.112572 0.3210813 9.458609 0.002
Residual 20 4.466983 0.6789187 NA NA
Total 21 6.579556 1.0000000 NA NA

Phylogenetic

phylo <- as.matrix(beta_q1p$S)
phylo <- as.dist(phylo[rownames(phylo) %in% samples_to_keep,
               colnames(phylo) %in% samples_to_keep])
betadisper(phylo, subset_meta$treatment) %>% permutest(.) 

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df  Sum Sq  Mean Sq      F N.Perm Pr(>F)
Groups     1 0.04731 0.047307 2.6157    999  0.129
Residuals 20 0.36172 0.018086                     
adonis2(phylo ~ treatment,
        data = subset_meta %>% arrange(match(sample,labels(phylo))),
        permutations = 999,
        strata = subset_meta %>% arrange(match(Tube_code,labels(phylo))) %>% pull(individual)) %>%
        broom::tidy() %>%
        tt()
term df SumOfSqs R2 statistic p.value
Model 1 0.3778248 0.239656 6.303884 0.002
Residual 20 1.1987049 0.760344 NA NA
Total 21 1.5765298 1.000000 NA NA

4.8 Does FMT change the microbial community over time?

alpha_div_meta <- alpha_div %>%
  left_join(sample_metadata, by = join_by(sample == sample))%>%
  filter(type == "Treatment" & treatment %in% c("FMT1","FMT2") ) 

4.8.1 Alpha diversity

alpha_div %>%
  pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
  left_join(., sample_metadata, by = "sample") %>%
  mutate(metric=factor(metric,levels=c("richness","neutral", "phylogenetic"))) %>%
  filter(metric!="phylogenetic") %>%
  filter(type=="Treatment" & treatment %in% c("FMT1", "FMT2" )) %>% 
  ggplot(aes(y = value, x = treatment, color=treatment, fill=treatment)) +
  geom_jitter(width = 0.2, show.legend = FALSE) +
  geom_boxplot(width = 0.5, alpha=0.5, outlier.shape = NA, show.legend = FALSE) +
  scale_color_manual(values=treatment_colors)+
  scale_fill_manual(values=treatment_colors) +
  facet_wrap(. ~ metric, scales = "free") +
      theme_classic() +
        theme(
    strip.background = element_blank(),
    panel.grid.minor.x = element_line(size = .1, color = "grey"),
    axis.title.x = element_blank(),
    axis.title.y = element_text(size=10),
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 10),
    axis.text = element_text(size = 8),
    axis.title = element_text(size = 8)
      ) +
  ylab("Alpha diversity")

Richness

Modelq0GLMMNB <- glmer.nb(richness ~ treatment+(1|individual), data = alpha_div_meta)
summary(Modelq0GLMMNB)
Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
 Family: Negative Binomial(4.3876)  ( log )
Formula: richness ~ treatment + (1 | individual)
   Data: alpha_div_meta

     AIC      BIC   logLik deviance df.resid 
   171.0    174.3    -81.5    163.0       13 

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-1.73495 -0.71265 -0.07086  0.40744  1.88240 

Random effects:
 Groups     Name        Variance  Std.Dev. 
 individual (Intercept) 1.073e-11 3.275e-06
Number of obs: 17, groups:  individual, 9

Fixed effects:
              Estimate Std. Error z value Pr(>|z|)    
(Intercept)     3.9343     0.1759  22.369   <2e-16 ***
treatmentFMT2   0.4052     0.2402   1.687   0.0917 .  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr)
tretmntFMT2 -0.732
optimizer (Nelder_Mead) convergence code: 0 (OK)
boundary (singular) fit: see help('isSingular')
emmeans::emmeans(Modelq0GLMMNB, pairwise ~ treatment)
$emmeans
 treatment emmean    SE  df asymp.LCL asymp.UCL
 FMT1        3.93 0.176 Inf      3.59      4.28
 FMT2        4.34 0.164 Inf      4.02      4.66

Results are given on the log (not the response) scale. 
Confidence level used: 0.95 

$contrasts
 contrast    estimate   SE  df z.ratio p.value
 FMT1 - FMT2   -0.405 0.24 Inf  -1.687  0.0917

Results are given on the log (not the response) scale. 

Neutral

Model_neutral <- lme(fixed = neutral ~ treatment, data = alpha_div_meta,
               random = ~ 1 | individual)
summary(Model_neutral)
Linear mixed-effects model fit by REML
  Data: alpha_div_meta 
       AIC      BIC   logLik
  128.7946 131.6268 -60.3973

Random effects:
 Formula: ~1 | individual
        (Intercept) Residual
StdDev:    3.497472 11.25384

Fixed effects:  neutral ~ treatment 
                 Value Std.Error DF  t-value p-value
(Intercept)   24.65947  4.164756  8 5.920988  0.0004
treatmentFMT2 14.87494  5.482531  7 2.713151  0.0301
 Correlation: 
              (Intr)
treatmentFMT2 -0.7  

Standardized Within-Group Residuals:
        Min          Q1         Med          Q3         Max 
-1.76462324 -0.55701822 -0.04763167  0.55267587  1.30333436 

Number of Observations: 17
Number of Groups: 9 

Phylogenetic

Model_phylo <- lme(fixed = phylogenetic ~ treatment, data = alpha_div_meta,
               random = ~ 1 | individual)
summary(Model_phylo)
Linear mixed-effects model fit by REML
  Data: alpha_div_meta 
       AIC      BIC    logLik
  56.26492 59.09712 -24.13246

Random effects:
 Formula: ~1 | individual
        (Intercept)  Residual
StdDev:   0.7000124 0.8410939

Fixed effects:  phylogenetic ~ treatment 
                 Value Std.Error DF   t-value p-value
(Intercept)   4.171152 0.3832714  8 10.883024  0.0000
treatmentFMT2 0.919088 0.4135879  7  2.222231  0.0617
 Correlation: 
              (Intr)
treatmentFMT2 -0.583

Standardized Within-Group Residuals:
       Min         Q1        Med         Q3        Max 
-1.1363964 -0.5779309 -0.1467688  0.4809911  2.3362210 

Number of Observations: 17
Number of Groups: 9 

4.8.2 Beta diversity

samples_to_keep <- sample_metadata %>%
  filter(type == "Treatment" & treatment %in% c("FMT1", "FMT2")) %>% 
  select(sample) %>% 
  pull()
subset_meta <- sample_metadata %>%
  filter(type == "Treatment" & treatment %in% c("FMT1", "FMT2"))

Richness

richness <- as.matrix(beta_q0n$S)
richness <- as.dist(richness[rownames(richness) %in% samples_to_keep,
               colnames(richness) %in% samples_to_keep])

betadisper(richness, subset_meta$treatment) %>% permutest(.) 

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df   Sum Sq  Mean Sq      F N.Perm Pr(>F)
Groups     1 0.017610 0.017610 2.8225    999  0.123
Residuals 15 0.093585 0.006239                     
adonis2(richness ~ treatment,
        data = subset_meta %>% arrange(match(sample,labels(richness))),
        permutations = 999,
        strata = subset_meta %>% arrange(match(sample,labels(richness))) %>% pull(individual)) %>%
        broom::tidy() %>%
        tt()
term df SumOfSqs R2 statistic p.value
Model 1 0.3560084 0.07968734 1.298809 0.00390625
Residual 15 4.1115570 0.92031266 NA NA
Total 16 4.4675655 1.00000000 NA NA

Neutral

neutral <- as.matrix(beta_q1n$S)
neutral <- as.dist(neutral[rownames(neutral) %in% samples_to_keep,
               colnames(neutral) %in% samples_to_keep])

betadisper(neutral, subset_meta$treatment) %>% permutest(.) 

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df   Sum Sq   Mean Sq      F N.Perm Pr(>F)
Groups     1 0.009828 0.0098278 0.7113    999  0.404
Residuals 15 0.207263 0.0138175                     
adonis2(neutral ~ treatment,
        data = subset_meta %>% arrange(match(sample,labels(neutral))),
        permutations = 999,
        strata = subset_meta %>% arrange(match(sample,labels(neutral))) %>% pull(individual)) %>%
        broom::tidy() %>%
        tt()
term df SumOfSqs R2 statistic p.value
Model 1 0.3149954 0.08110541 1.323962 0.00390625
Residual 15 3.5687823 0.91889459 NA NA
Total 16 3.8837776 1.00000000 NA NA

Phylogenetic

phylo <- as.matrix(beta_q1p$S)
phylo <- as.dist(phylo[rownames(phylo) %in% samples_to_keep,
               colnames(phylo) %in% samples_to_keep])
betadisper(phylo, subset_meta$treatment) %>% permutest(.) 

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df   Sum Sq  Mean Sq      F N.Perm Pr(>F)
Groups     1 0.010955 0.010955 0.6602    999  0.542
Residuals 15 0.248892 0.016593                     
adonis2(phylo ~ treatment,
        data = subset_meta %>% arrange(match(sample,labels(phylo))),
        permutations = 999,
        strata = subset_meta %>% arrange(match(sample,labels(phylo))) %>% pull(individual)) %>%
        broom::tidy() %>%
        tt()
term df SumOfSqs R2 statistic p.value
Model 1 0.06391536 0.09449338 1.565312 0.0703125
Residual 15 0.61248501 0.90550662 NA NA
Total 16 0.67640037 1.00000000 NA NA

4.9 Do FMT change the microbial community compared to antibiotics and acclimation?

alpha_div_meta <- alpha_div %>%
  left_join(sample_metadata, by = join_by(sample == sample))%>%
  filter(type == "Treatment" & treatment %in% c("Antibiotics", "Acclimation","FMT1") ) 

4.9.1 Alpha diversity

alpha_div %>%
  pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
  left_join(., sample_metadata, by = "sample") %>%
  mutate(metric=factor(metric,levels=c("richness","neutral", "phylogenetic"))) %>%
  filter(metric!="neutral")%>%
  filter(type=="Treatment" & treatment %in% c( "Antibiotics","Acclimation", "FMT1")) %>% 
  ggplot(aes(y = value, x = treatment, color=treatment, fill=treatment)) +
  geom_jitter(width = 0.2, show.legend = FALSE) +
  geom_boxplot(width = 0.5, alpha=0.5, outlier.shape = NA, show.legend = FALSE) +
#  scale_color_manual(values=treatment_colors)+
#  scale_fill_manual(values=treatment_colors) +
  facet_wrap(. ~ metric, scales = "free") +
      theme_classic() +
        theme(
    strip.background = element_blank(),
    panel.grid.minor.x = element_line(size = .1, color = "grey"),
    axis.title.x = element_blank(),
    axis.title.y = element_text(size=10),
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 10),
    axis.text = element_text(size = 8),
    axis.title = element_text(size = 8)
      ) +
  ylab("Alpha diversity")

Richness: Problems to calculate

Modelq0GLMMNB <- glmer.nb(richness ~ treatment+(1|individual), data = alpha_div_meta)
summary(Modelq0GLMMNB)
emmeans::emmeans(Modelq0GLMMNB, pairwise ~ treatment)

Neutral

Model_neutral <- lme(fixed = neutral ~ treatment, data = alpha_div_meta,
               random = ~ 1 | individual)
summary(Model_neutral)
Linear mixed-effects model fit by REML
  Data: alpha_div_meta 
       AIC     BIC   logLik
  173.0404 178.263 -81.5202

Random effects:
 Formula: ~1 | individual
        (Intercept) Residual
StdDev:    4.263247 9.322271

Fixed effects:  neutral ~ treatment 
                         Value Std.Error DF   t-value p-value
(Intercept)          17.310151  3.416951 13  5.065963  0.0002
treatmentAntibiotics -8.868175  4.744329 13 -1.869216  0.0843
treatmentFMT1         7.289358  4.552796 13  1.601073  0.1334
 Correlation: 
                     (Intr) trtmnA
treatmentAntibiotics -0.596       
treatmentFMT1        -0.621  0.457

Standardized Within-Group Residuals:
        Min          Q1         Med          Q3         Max 
-1.93185401 -0.56384573 -0.04015247  0.47657219  1.55593252 

Number of Observations: 24
Number of Groups: 9 
emmeans(Model_neutral, pairwise ~ treatment)
$emmeans
 treatment   emmean   SE df lower.CL upper.CL
 Acclimation  17.31 3.42  8    9.431     25.2
 Antibiotics   8.44 3.86  8   -0.451     17.3
 FMT1         24.60 3.62  8   16.256     32.9

Degrees-of-freedom method: containment 
Confidence level used: 0.95 

$contrasts
 contrast                  estimate   SE df t.ratio p.value
 Acclimation - Antibiotics     8.87 4.74 13   1.869  0.1868
 Acclimation - FMT1           -7.29 4.55 13  -1.601  0.2800
 Antibiotics - FMT1          -16.16 4.85 13  -3.333  0.0139

Degrees-of-freedom method: containment 
P value adjustment: tukey method for comparing a family of 3 estimates 

Phylogenetic

Model_phylo <- lme(fixed = phylogenetic ~ treatment, data = alpha_div_meta,
               random = ~ 1 | individual)
summary(Model_phylo)
Linear mixed-effects model fit by REML
  Data: alpha_div_meta 
       AIC      BIC    logLik
  93.06845 98.29106 -41.53422

Random effects:
 Formula: ~1 | individual
        (Intercept) Residual
StdDev:   0.5550293 1.414341

Fixed effects:  phylogenetic ~ treatment 
                         Value Std.Error DF   t-value p-value
(Intercept)           5.515026 0.5064492 13 10.889593  0.0000
treatmentAntibiotics -1.206905 0.7182905 13 -1.680247  0.1168
treatmentFMT1        -1.361586 0.6899383 13 -1.973490  0.0701
 Correlation: 
                     (Intr) trtmnA
treatmentAntibiotics -0.611       
treatmentFMT1        -0.636  0.456

Standardized Within-Group Residuals:
         Min           Q1          Med           Q3          Max 
-2.001111616 -0.444289520  0.007864473  0.386082526  1.973255643 

Number of Observations: 24
Number of Groups: 9 
emmeans(Model_phylo, pairwise ~ treatment)
$emmeans
 treatment   emmean    SE df lower.CL upper.CL
 Acclimation   5.52 0.506  8     4.35     6.68
 Antibiotics   4.31 0.573  8     2.99     5.63
 FMT1          4.15 0.537  8     2.92     5.39

Degrees-of-freedom method: containment 
Confidence level used: 0.95 

$contrasts
 contrast                  estimate    SE df t.ratio p.value
 Acclimation - Antibiotics    1.207 0.718 13   1.680  0.2494
 Acclimation - FMT1           1.362 0.690 13   1.973  0.1581
 Antibiotics - FMT1           0.155 0.735 13   0.210  0.9759

Degrees-of-freedom method: containment 
P value adjustment: tukey method for comparing a family of 3 estimates 

4.9.2 Beta diversity

samples_to_keep <- sample_metadata %>%
  filter(type == "Treatment" & treatment %in% c("Acclimation", "Antibiotics", "Post-FMT1")) %>% 
  select(sample) %>% 
  pull()
subset_meta <- sample_metadata %>%
  filter(type == "Treatment" & treatment %in% c("Acclimation", "Antibiotics", "Post-FMT1"))

Richness

richness <- as.matrix(beta_q0n$S)
richness <- as.dist(richness[rownames(richness) %in% samples_to_keep,
               colnames(richness) %in% samples_to_keep])

betadisper(richness, subset_meta$treatment) %>% permutest(.) 

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df   Sum Sq   Mean Sq      F N.Perm Pr(>F)
Groups     1 0.008328 0.0083277 1.2753    999   0.29
Residuals 14 0.091419 0.0065300                     
adonis2(richness ~ treatment,
        data = subset_meta %>% arrange(match(sample,labels(richness))),
        permutations = 999,
        strata = subset_meta %>% arrange(match(sample,labels(richness))) %>% pull(individual)) %>%
        broom::tidy() %>%
        tt()
term df SumOfSqs R2 statistic p.value
Model 1 0.8287871 0.1407734 2.293722 0.0078125
Residual 14 5.0585993 0.8592266 NA NA
Total 15 5.8873864 1.0000000 NA NA
pairwise.adonis(richness, subset_meta$treatment, perm = 999)
                       pairs Df SumsOfSqs  F.Model        R2 p.value p.adjusted sig
1 Acclimation vs Antibiotics  1 0.8287871 2.293722 0.1407734   0.005      0.005   *

Neutral

neutral <- as.matrix(beta_q1n$S)
neutral <- as.dist(neutral[rownames(neutral) %in% samples_to_keep,
               colnames(neutral) %in% samples_to_keep])

betadisper(neutral, subset_meta$treatment) %>% permutest(.) 

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df   Sum Sq   Mean Sq      F N.Perm Pr(>F)
Groups     1 0.005446 0.0054462 0.3949    999  0.534
Residuals 14 0.193060 0.0137900                     
adonis2(neutral ~ treatment,
        data = subset_meta %>% arrange(match(sample,labels(neutral))),
        permutations = 999,
        strata = subset_meta %>% arrange(match(sample,labels(neutral))) %>% pull(individual)) %>%
        broom::tidy() %>%
        tt()
term df SumOfSqs R2 statistic p.value
Model 1 0.8814932 0.1640316 2.747044 0.0078125
Residual 14 4.4924309 0.8359684 NA NA
Total 15 5.3739241 1.0000000 NA NA
pairwise.adonis(neutral, subset_meta$treatment, perm = 999)
                       pairs Df SumsOfSqs  F.Model        R2 p.value p.adjusted sig
1 Acclimation vs Antibiotics  1 0.8814932 2.747044 0.1640316   0.004      0.004   *

Phylogenetic

phylo <- as.matrix(beta_q1p$S)
phylo <- as.dist(phylo[rownames(phylo) %in% samples_to_keep,
               colnames(phylo) %in% samples_to_keep])
betadisper(phylo, subset_meta$treatment) %>% permutest(.) 

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df  Sum Sq  Mean Sq      F N.Perm Pr(>F)
Groups     1 0.07055 0.070551 2.4964    999  0.141
Residuals 14 0.39565 0.028260                     
adonis2(phylo ~ treatment,
        data = subset_meta %>% arrange(match(sample,labels(phylo))),
        permutations = 999,
        strata = subset_meta %>% arrange(match(sample,labels(phylo))) %>% pull(individual)) %>%
        broom::tidy() %>%
        tt()
term df SumOfSqs R2 statistic p.value
Model 1 0.6885926 0.2679674 5.124832 0.015625
Residual 14 1.8810952 0.7320326 NA NA
Total 15 2.5696878 1.0000000 NA NA
pairwise.adonis(phylo, subset_meta$treatment, perm = 999)
                       pairs Df SumsOfSqs  F.Model        R2 p.value p.adjusted sig
1 Acclimation vs Antibiotics  1 0.6885926 5.124832 0.2679674   0.004      0.004   *

4.10 Is the gut microbiota similar to the donor after FMT?

sample_metadata <- sample_metadata %>%
    mutate(treatment=case_when(
    treatment %in% c("FMT1", "FMT2") ~ "FMT",
    TRUE ~ treatment
  ))

samples_to_keep <- sample_metadata %>%
  filter(type == "Treatment" & treatment %in% c("FMT", "Transplant")) %>% 
  select(sample) %>% 
  pull()

subset_meta <- sample_metadata %>%
  filter(type == "Treatment" & treatment %in% c("FMT", "Transplant"))

4.10.1 Alpha diversity

alpha_div %>%
  pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
  left_join(., sample_metadata, by = "sample") %>%
  mutate(metric=factor(metric,levels=c("neutral","richness","phylogenetic"))) %>%
  filter(type=="Treatment" & treatment %in% c( "Transplant", "FMT")) %>% 
  ggplot(aes(y = value, x = treatment, color=treatment, fill=treatment)) +
  geom_jitter(width = 0.2, show.legend = FALSE) +
  geom_boxplot(width = 0.5, alpha=0.5, outlier.shape = NA, show.legend = FALSE) +
#  scale_color_manual(values=treatment_colors)+
#  scale_fill_manual(values=treatment_colors) +
  facet_wrap(. ~ metric, scales = "free") +
      theme_classic() +
        theme(
    strip.background = element_blank(),
    panel.grid.minor.x = element_line(size = .1, color = "grey"),
    axis.title.x = element_blank(),
    axis.title.y = element_text(size=10),
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 10),
    axis.text = element_text(size = 8),
    axis.title = element_text(size = 8)
      ) +
  ylab("Alpha diversity")

Richness

Modelq0GLMMNB <- glmer.nb(richness ~ treatment+(1|individual), data = alpha_div_meta)
summary(Modelq0GLMMNB)
emmeans::emmeans(Modelq0GLMMNB, pairwise ~ treatment)

Neutral

Model_neutral <- lme(fixed = neutral ~ treatment, data = alpha_div_meta,
               random = ~ 1 | individual)
summary(Model_neutral)
Linear mixed-effects model fit by REML
  Data: alpha_div_meta 
       AIC     BIC   logLik
  173.0404 178.263 -81.5202

Random effects:
 Formula: ~1 | individual
        (Intercept) Residual
StdDev:    4.263247 9.322271

Fixed effects:  neutral ~ treatment 
                         Value Std.Error DF   t-value p-value
(Intercept)          17.310151  3.416951 13  5.065963  0.0002
treatmentAntibiotics -8.868175  4.744329 13 -1.869216  0.0843
treatmentFMT1         7.289358  4.552796 13  1.601073  0.1334
 Correlation: 
                     (Intr) trtmnA
treatmentAntibiotics -0.596       
treatmentFMT1        -0.621  0.457

Standardized Within-Group Residuals:
        Min          Q1         Med          Q3         Max 
-1.93185401 -0.56384573 -0.04015247  0.47657219  1.55593252 

Number of Observations: 24
Number of Groups: 9 

Phylogenetic

Model_phylo <- lme(fixed = phylogenetic ~ treatment, data = alpha_div_meta,
               random = ~ 1 | individual)
summary(Model_phylo)
Linear mixed-effects model fit by REML
  Data: alpha_div_meta 
       AIC      BIC    logLik
  93.06845 98.29106 -41.53422

Random effects:
 Formula: ~1 | individual
        (Intercept) Residual
StdDev:   0.5550293 1.414341

Fixed effects:  phylogenetic ~ treatment 
                         Value Std.Error DF   t-value p-value
(Intercept)           5.515026 0.5064492 13 10.889593  0.0000
treatmentAntibiotics -1.206905 0.7182905 13 -1.680247  0.1168
treatmentFMT1        -1.361586 0.6899383 13 -1.973490  0.0701
 Correlation: 
                     (Intr) trtmnA
treatmentAntibiotics -0.611       
treatmentFMT1        -0.636  0.456

Standardized Within-Group Residuals:
         Min           Q1          Med           Q3          Max 
-2.001111616 -0.444289520  0.007864473  0.386082526  1.973255643 

Number of Observations: 24
Number of Groups: 9 

4.10.2 Beta diversity

Richness

richness <- as.matrix(beta_q0n$S)
richness <- as.dist(richness[rownames(richness) %in% samples_to_keep,
               colnames(richness) %in% samples_to_keep])

betadisper(richness, subset_meta$treatment) %>% permutest(.) 

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df  Sum Sq  Mean Sq      F N.Perm Pr(>F)    
Groups     1 0.11100 0.111002 11.752    999  0.001 ***
Residuals 28 0.26447 0.009445                         
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(richness ~ treatment,
        data = subset_meta %>% arrange(match(sample,labels(richness))),
        permutations = 999,
        strata = subset_meta %>% arrange(match(sample,labels(richness))) %>% pull(individual)) %>%
        broom::tidy() %>%
        tt()
term df SumOfSqs R2 statistic p.value
Model 1 1.184578 0.1548451 5.13002 0.001
Residual 28 6.465508 0.8451549 NA NA
Total 29 7.650086 1.0000000 NA NA

Neutral

neutral <- as.matrix(beta_q1n$S)
neutral <- as.dist(neutral[rownames(neutral) %in% samples_to_keep,
               colnames(neutral) %in% samples_to_keep])

betadisper(neutral, subset_meta$treatment) %>% permutest(.) 

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df  Sum Sq  Mean Sq      F N.Perm Pr(>F)  
Groups     1 0.04408 0.044082 3.1744    999  0.088 .
Residuals 28 0.38883 0.013887                       
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(neutral ~ treatment,
        data = subset_meta %>% arrange(match(sample,labels(neutral))),
        permutations = 999,
        strata = subset_meta %>% arrange(match(sample,labels(neutral))) %>% pull(individual)) %>%
        broom::tidy() %>%
        tt()
term df SumOfSqs R2 statistic p.value
Model 1 1.147077 0.1608783 5.368223 0.001
Residual 28 5.983012 0.8391217 NA NA
Total 29 7.130089 1.0000000 NA NA
neutral %>%
  vegan::metaMDS(., trymax = 500, k = 2, trace=0) %>%
  vegan::scores() %>%
  as_tibble(., rownames = "sample") %>%
  dplyr::left_join(subset_meta, by = join_by(sample == sample)) %>%
  group_by(treatment) %>%
  mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
  mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
  ungroup() %>%
  ggplot(aes(x = NMDS1, y = NMDS2, color = treatment)) +
  geom_point(size = 4) +
 # scale_color_manual(values = treatment_colors) +
  geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2), alpha = 0.9, show.legend = FALSE) +
  theme(
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    panel.background = element_blank(),
    axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
    legend.text = element_text(size = 10),
    legend.title = element_text(size = 12),
    legend.position = "right", legend.box = "vertical"
    )

Phylogenetic

phylo <- as.matrix(beta_q1p$S)
phylo <- as.dist(phylo[rownames(phylo) %in% samples_to_keep,
               colnames(phylo) %in% samples_to_keep])
betadisper(phylo, subset_meta$treatment) %>% permutest(.) 

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df  Sum Sq   Mean Sq      F N.Perm Pr(>F)
Groups     1 0.00007 0.0000689 0.0044    999  0.954
Residuals 28 0.43637 0.0155847                     
adonis2(phylo ~ treatment,
        data = subset_meta %>% arrange(match(sample,labels(phylo))),
        permutations = 999,
        strata = subset_meta %>% arrange(match(sample,labels(phylo))) %>% pull(individual)) %>%
        broom::tidy() %>%
        tt()
term df SumOfSqs R2 statistic p.value
Model 1 0.1298187 0.1018776 3.17615 0.003
Residual 28 1.1444432 0.8981224 NA NA
Total 29 1.2742619 1.0000000 NA NA

4.10.3 Structural zeros

FMT_samples <- sample_metadata %>%
  filter(type == "Treatment" & treatment == "FMT") %>% 
  dplyr::select(sample) %>%
  pull()

Transplant_samples <- sample_metadata %>%
  filter(type == "Treatment" & treatment =="Transplant") %>% 
  dplyr::select(sample) %>% pull()

structural_zeros <- genome_counts_filt %>% 
  select(one_of(c("genome",subset_meta$sample))) %>%
  filter(rowSums(across(where(is.numeric), ~ . != 0)) > 0) %>%
  select(genome, where(~ is.numeric(.) && sum(.) > 0)) %>% 
   rowwise() %>% #compute for each row (genome)
   mutate(all_zeros_FMT = all(c_across(all_of(FMT_samples)) == 0)) %>% 
   mutate(all_zeros_Transplant = all(c_across(all_of(Transplant_samples)) == 0)) %>% 
   mutate(average_FMT = mean(c_across(all_of(FMT_samples)), na.rm = TRUE)) %>% 
   mutate(average_Transplant = mean(c_across(all_of(Transplant_samples)), na.rm = TRUE)) %>% 
   filter(all_zeros_FMT == TRUE || all_zeros_Transplant==TRUE)  %>% 
   mutate(present = case_when(
      all_zeros_FMT & !all_zeros_Transplant ~ "Transplant",
      !all_zeros_FMT & all_zeros_Transplant ~ "FMT",
      !all_zeros_FMT & !all_zeros_Transplant ~ "None",
      TRUE ~ NA_character_
    )) %>%
   mutate(average = ifelse(present == "FMT", average_FMT, average_Transplant)) %>%
   dplyr::select(genome, present, average) %>%
   left_join(genome_metadata, by=join_by(genome==genome)) %>%
   arrange(present,-average)

Structural zeros in each group

fmt <- structural_zeros %>% 
  filter(present=="FMT") %>% 
  count(phylum, name = "FMT") %>%
  arrange(desc(FMT)) 

fmt_f <- structural_zeros %>% 
  filter(present=="FMT") %>% 
  count(family, name = "FMT") %>%
  arrange(desc(FMT)) 
structural_zeros %>% 
  filter(present=="Transplant") %>% 
  count(phylum, name = "Transplant") %>%
  arrange(desc(Transplant)) %>% 
  full_join(.,fmt, by="phylum" ) %>%
  mutate(across(everything(), ~ replace_na(., 0))) %>%
  as.data.frame() %>% 
  summarise(across(where(is.numeric), ~ sum(.x, na.rm = TRUE)))
  Transplant FMT
1         52  55

Phyla to which the structural zeros belong in each group

structural_zeros %>% 
  filter(present=="Transplant") %>% 
  count(phylum, name = "Transplant") %>%
  arrange(desc(Transplant)) %>% 
  full_join(.,fmt, by="phylum" ) %>%
  mutate(across(everything(), ~ replace_na(., 0))) %>% 
  tt()
phylum Transplant FMT
p__Bacillota_A 20 21
p__Bacillota 16 10
p__Pseudomonadota 6 11
p__Bacteroidota 3 6
p__Desulfobacterota 2 2
p__Bacillota_B 1 0
p__Chlamydiota 1 0
p__Cyanobacteriota 1 0
p__Fusobacteriota 1 0
p__Verrucomicrobiota 1 2
p__Actinomycetota 0 1
p__Bacillota_C 0 1
p__Campylobacterota 0 1

Families to which the structural zeros belong in each group

structural_zeros %>% 
  filter(present=="Transplant") %>% 
  count(family, name = "Transplant") %>%
  arrange(desc(Transplant)) %>% 
  full_join(.,fmt_f, by="family" ) %>%
  mutate(across(everything(), ~ replace_na(., 0))) %>% 
  tt()
family Transplant FMT
f__Lachnospiraceae 7 9
f__Erysipelotrichaceae 6 5
f__UBA660 6 0
f__Enterobacteriaceae 5 2
f__CAG-508 3 0
f__Ruminococcaceae 3 3
f__Anaerovoracaceae 2 0
f__Coprobacillaceae 2 0
f__Desulfovibrionaceae 2 2
f__Oscillospiraceae 2 1
f__Tannerellaceae 2 1
f__UBA1242 2 0
f__ 1 3
f__Akkermansiaceae 1 0
f__CAG-239 1 2
f__Chlamydiaceae 1 0
f__Enterococcaceae 1 3
f__Fusobacteriaceae 1 0
f__Gastranaerophilaceae 1 0
f__Marinifilaceae 1 0
f__Mycoplasmoidaceae 1 1
f__Peptococcaceae 1 0
f__Anaerotignaceae 0 2
f__Bacteroidaceae 0 2
f__LL51 0 2
f__UBA3700 0 2
f__Acutalibacteraceae 0 1
f__Arcobacteraceae 0 1
f__CAG-274 0 1
f__CALVMC01 0 1
f__Devosiaceae 0 1
f__Mycobacteriaceae 0 1
f__Pumilibacteraceae 0 1
f__RUG11792 0 1
f__Rhizobiaceae 0 1
f__Rikenellaceae 0 1
f__Sphingobacteriaceae 0 1
f__Streptococcaceae 0 1
f__UBA1997 0 1
f__UBA3830 0 1
f__Weeksellaceae 0 1

4.10.3.1 Functional capacities of the structural zeros

#Aggregate bundle-level GIFTs into the compound level
GIFTs_elements <- to.elements(genome_gifts,GIFT_db)
GIFTs_elements_filtered <- GIFTs_elements[rownames(GIFTs_elements) %in% structural_zeros$genome,]
GIFTs_elements_filtered <- as.data.frame(GIFTs_elements_filtered) %>% 
  select_if(~ !is.numeric(.) || sum(.) != 0)

#Aggregate element-level GIFTs into the function level
GIFTs_functions <- to.functions(GIFTs_elements_filtered,GIFT_db)

#Aggregate function-level GIFTs into overall Biosynthesis, Degradation and Structural GIFTs
GIFTs_domains <- to.domains(GIFTs_functions,GIFT_db)

sample_sub <- sample_metadata %>%
    mutate(treatment=case_when(
    treatment %in% c("Post-FMT1", "Post-FMT2") ~ "FMT",
    TRUE ~ treatment
  ))%>%
  filter(type == "Treatment" & treatment %in% c("FMT", "Transplant"))

genome_counts_row <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% 
  filter(genome %in% structural_zeros$genome) %>% 
  select(one_of(c("genome",sample_sub$sample))) %>%
  filter(rowSums(across(where(is.numeric), ~ . != 0)) > 0) %>%
  select(genome, where(~ is.numeric(.) && sum(.) > 0)) %>% 
  column_to_rownames(., "genome") 
GIFTs_elements_community <- to.community(GIFTs_elements_filtered,genome_counts_row,GIFT_db)
GIFTs_functions_community <- to.community(GIFTs_functions,genome_counts_row,GIFT_db)
GIFTs_domains_community <- to.community(GIFTs_domains,genome_counts_row,GIFT_db)
element_gift <- GIFTs_elements_community %>% 
  as.data.frame() %>% 
  rownames_to_column(., "sample") %>% 
  left_join(., sample_metadata[c(12,13)], by=join_by("sample"=="sample"))
uniqueGIFT_db<- unique(GIFT_db[c(2,4,5,6)]) %>% unite("Function",Function:Element, sep= "_", remove=FALSE)

significant_elements <- element_gift %>%
    pivot_longer(-c(sample,treatment), names_to = "trait", values_to = "value") %>%
    group_by(trait) %>%
    summarise(p_value = wilcox.test(value ~ treatment)$p.value) %>%
    mutate(p_adjust=p.adjust(p_value, method="BH")) %>%
    filter(p_adjust < 0.05)%>%
  rownames_to_column(., "Elements")  %>%
  left_join(.,uniqueGIFT_db[c(1,3)],by = join_by(trait == Code_element))

element_gift_t <- element_gift  %>% 
  t() %>%
  row_to_names(row_number = 1) %>%
  as.data.frame() %>%
  mutate_if(is.character, as.numeric)  %>%
  rownames_to_column(., "trait")

element_gift_filt <- subset(element_gift_t, trait %in% significant_elements$trait) %>% 
  t() %>%
  row_to_names(row_number = 1) %>%
  as.data.frame() %>%
  mutate_if(is.character, as.numeric)  %>%
  rownames_to_column(., "sample")%>% 
  left_join(., sample_metadata[c(12,13)], by = join_by(sample == sample))

difference_table <- element_gift_filt %>%
  select(-sample) %>%
  group_by(treatment) %>%
  summarise(across(everything(), mean)) %>%
  t() %>%
  row_to_names(row_number = 1) %>%
  as.data.frame() %>%
  mutate_if(is.character, as.numeric) %>%
  rownames_to_column(., "Elements") %>%
  left_join(.,uniqueGIFT_db[c(1,3,4)],by = join_by(Elements == Code_element)) %>% 
  arrange(Function) %>% 
  mutate(Difference=FMT-Transplant)%>% 
  mutate(group_color = ifelse(Difference <0, "Transplant","FMT")) 
difference_table %>%
  ggplot(aes(x=forcats::fct_reorder(Function,Difference), y=Difference, fill=group_color)) + 
  geom_col() +
#  geom_point(size=4) + 
  scale_fill_manual(values=treatment_colors) +
  geom_hline(yintercept=0) + 
  coord_flip()+
  theme(axis.text = element_text(size = 10),
        axis.title = element_text(size = 12),
        legend.position = "right", 
        panel.background = element_blank(),
          panel.grid.major = element_line(size = 0.15, linetype = 'solid',
                                colour = "grey"))+
  xlab("Microbial Functional Capacity") + 
  ylab("Mean difference")+
  labs(fill="Elevation")

4.10.4 Differential abundance analysis: composition

phylo_samples <- sample_metadata %>%
    mutate(treatment=case_when(
    treatment %in% c("FMT1", "FMT2") ~ "FMT",
    TRUE ~ treatment
  ))%>%
  filter(type == "Treatment" & treatment %in% c("FMT", "Transplant")) %>% 
  column_to_rownames("sample") %>%
  sample_data()
phylo_genome <- genome_counts_filt %>% 
  filter(!genome %in% structural_zeros$genome) %>% 
  select(one_of(c("genome",rownames(phylo_samples)))) %>%
  filter(rowSums(across(where(is.numeric), ~ . != 0)) > 0) %>%
  select(genome, where(~ is.numeric(.) && sum(.) > 0)) %>%
  column_to_rownames("genome") %>% 
  otu_table(., taxa_are_rows = TRUE)
phylo_taxonomy <- genome_metadata %>% 
  filter(genome %in% rownames(phylo_genome)) %>% 
  column_to_rownames("genome") %>% 
  dplyr::select(domain,phylum,class,order,family,genus,species) %>% 
  as.matrix() %>% 
  tax_table()

physeq_genome_filtered <- phyloseq(phylo_genome, phylo_taxonomy, phylo_samples)
ancom_rand_output = ancombc2(data = physeq_genome_filtered, 
                  assay_name = "counts", 
                  tax_level = NULL,
                  fix_formula = "treatment",
                  p_adj_method = "holm", 
                  pseudo_sens = TRUE,
                  prv_cut =0, 
                  lib_cut = 0, 
                  s0_perc = 0.05,
                  group = NULL, 
                  struc_zero = FALSE, 
                  neg_lb = FALSE,
                  alpha = 0.05, 
                  n_cl = 2, 
                  verbose = TRUE,
                  global = FALSE, 
                  pairwise = FALSE, 
                  dunnet = FALSE, 
                  trend = FALSE,
                  iter_control = list(tol = 1e-5, max_iter = 20, verbose = FALSE),
                  em_control = list(tol = 1e-5, max_iter = 100),
                  lme_control = lme4::lmerControl(),
                  mdfdr_control = list(fwer_ctrl_method = "holm", B = 100), 
                  trend_control = NULL)
save(ancom_rand_output, file = "data/ancombc_FMT_transplant.Rdata")
load("data/ancombc_FMT_transplant.Rdata")
taxonomy <- data.frame(physeq_genome_filtered@tax_table) %>%
  rownames_to_column(., "taxon") %>%
  mutate_at(vars(order, phylum, family, genus, species), ~ str_replace(., "[dpcofgs]__", ""))

ancombc_rand_table_mag <- ancom_rand_output$res %>%
  dplyr::select(taxon, lfc_treatmentTransplant, p_treatmentTransplant) %>%
  filter(p_treatmentTransplant < 0.05) %>%
  dplyr::arrange(p_treatmentTransplant) %>%
  merge(., taxonomy, by="taxon") %>%
  mutate_at(vars(phylum, species), ~ str_replace(., "[dpcofgs]__", ""))%>%
  dplyr::arrange(lfc_treatmentTransplant)

colors_alphabetic <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  mutate_at(vars(phylum), ~ str_replace(., "[dpcofgs]__", ""))  %>%
  right_join(taxonomy, by=join_by(phylum == phylum)) %>%
  dplyr::select(phylum, colors) %>%
  mutate(colors = str_c(colors, "80"))  %>% #add 80% alpha
    unique() %>%
    dplyr::arrange(phylum)

tax_table <- as.data.frame(unique(ancombc_rand_table_mag$phylum))
  
colnames(tax_table)[1] <- "phylum"
tax_color <- merge(tax_table, colors_alphabetic, by="phylum")%>%
    dplyr::arrange(phylum) %>%
    dplyr::select(colors) %>%
    pull()

Differential abundance MAGs in each group

fmt_count <- ancombc_rand_table_mag %>%
  filter(lfc_treatmentTransplant<0)  %>% 
  count(phylum, name = "FMT") %>%
  arrange(desc(FMT)) 

ancombc_rand_table_mag %>%
  filter(lfc_treatmentTransplant>0)  %>% 
  count(phylum, name = "Transplant") %>%
  arrange(desc(Transplant))  %>% 
  full_join(.,fmt_count, by="phylum")%>%
  mutate(across(where(is.numeric), ~ replace_na(., 0)))
             phylum Transplant FMT
1      Bacteroidota         13   5
2       Bacillota_A          4  17
3         Bacillota          3   1
4  Campylobacterota          1   1
5  Desulfobacterota          1   2
6     Spirochaetota          1   0
7    Pseudomonadota          0   5
8   Cyanobacteriota          0   2
9       Bacillota_B          0   1
10      Bacillota_C          0   1
ancombc_rand_table_mag %>%
  filter(lfc_treatmentTransplant>0)  %>% 
  count(phylum, name = "Transplant") %>%
  arrange(desc(Transplant))  %>% 
  full_join(.,fmt_count, by="phylum")%>%
  mutate(across(where(is.numeric), ~ replace_na(., 0))) %>% 
  as.data.frame() %>% 
  summarise(across(where(is.numeric), ~ sum(.x, na.rm = TRUE)))
  Transplant FMT
1         23  35
ancombc_rand_table_mag%>%
      mutate(genome=factor(taxon,levels=ancombc_rand_table_mag$taxon)) %>%
ggplot(., aes(x=lfc_treatmentTransplant, y=forcats::fct_reorder(genome,lfc_treatmentTransplant), fill=phylum)) + #forcats::fct_rev()
  geom_col() + 
  scale_fill_manual(values=tax_color) + 
  geom_hline(yintercept=0) + 
#  coord_flip()+
  theme(panel.background = element_blank(),
        axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
        axis.text.x = element_text(size = 12),
        axis.text.y = element_text(size = 8),
        axis.title = element_text(size = 14, face = "bold"),
        legend.text = element_text(size = 12),
        legend.title = element_text(size = 14, face = "bold"),
        legend.position = "right", legend.box = "vertical")+
  xlab("log2FoldChange") + 
  ylab("Species")+
  guides(fill=guide_legend(title="Phylum"))

4.10.5 Differences in the functional capacities

GIFTs_elements <- to.elements(genome_gifts,GIFT_db)
GIFTs_elements_filtered <- GIFTs_elements[rownames(GIFTs_elements) %in% genome_counts_filt$genome,]
GIFTs_elements_filtered <- as.data.frame(GIFTs_elements_filtered) %>% 
  select_if(~ !is.numeric(.) || sum(.) != 0)
GIFTs_functions <- to.functions(GIFTs_elements_filtered,GIFT_db)
GIFTs_domains <- to.domains(GIFTs_functions,GIFT_db)

sample_sub <- sample_metadata %>%
    mutate(treatment=case_when(
    treatment %in% c("Post-FMT1", "Post-FMT2") ~ "FMT",
    TRUE ~ treatment
  ))%>%
  filter(type == "Treatment" & treatment %in% c("FMT", "Transplant"))

genome_counts_row <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% 
    select(one_of(c("genome",sample_sub$sample))) %>%
  column_to_rownames(., "genome") 

GIFTs_elements_community <- to.community(GIFTs_elements_filtered,genome_counts_row,GIFT_db)
GIFTs_functions_community <- to.community(GIFTs_functions,genome_counts_row,GIFT_db)
GIFTs_domains_community <- to.community(GIFTs_domains,genome_counts_row,GIFT_db)

4.10.5.1 Community elements differences:

MCI

GIFTs_elements_community %>%
  rowMeans() %>%
  as_tibble(., rownames = "sample") %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>%
  group_by(treatment) %>%
  summarise(MCI = mean(value), sd = sd(value))
# A tibble: 2 × 3
  treatment    MCI     sd
  <chr>      <dbl>  <dbl>
1 FMT        0.359 0.0259
2 Transplant 0.356 0.0432
MCI <- GIFTs_elements_community %>%
  rowMeans() %>%
  as_tibble(., rownames = "sample") %>%
  left_join(sample_metadata, by = join_by(sample == sample)) 

shapiro.test(MCI$value)

    Shapiro-Wilk normality test

data:  MCI$value
W = 0.94871, p-value = 0.1561
wilcox.test(value ~ treatment, data=MCI)

    Wilcoxon rank sum exact test

data:  value by treatment
W = 128, p-value = 0.4826
alternative hypothesis: true location shift is not equal to 0
element_gift <- GIFTs_elements_community %>% 
  as.data.frame() %>% 
  rownames_to_column(., "sample") %>% 
  left_join(., sample_metadata[c(12,13)], by=join_by("sample"=="sample"))
uniqueGIFT_db<- unique(GIFT_db[c(2,4,5,6)]) %>% unite("Function",Function:Element, sep= "_", remove=FALSE)

significant_elements <- element_gift %>%
    pivot_longer(-c(sample,treatment), names_to = "trait", values_to = "value") %>%
    group_by(trait) %>%
    summarise(p_value = wilcox.test(value ~ treatment)$p.value) %>%
    mutate(p_adjust=p.adjust(p_value, method="BH")) %>%
    filter(p_adjust < 0.05)%>%
  rownames_to_column(., "Elements")  %>%
  left_join(.,uniqueGIFT_db[c(1,3)],by = join_by(trait == Code_element))

element_gift_t <- element_gift  %>% 
  t() %>%
  row_to_names(row_number = 1) %>%
  as.data.frame() %>%
  mutate_if(is.character, as.numeric)  %>%
  rownames_to_column(., "trait")

element_gift_filt <- subset(element_gift_t, trait %in% significant_elements$trait) %>% 
  t() %>%
  row_to_names(row_number = 1) %>%
  as.data.frame() %>%
  mutate_if(is.character, as.numeric)  %>%
  rownames_to_column(., "sample")%>% 
  left_join(., sample_metadata[c(12,13)], by = join_by(sample == sample))

difference_table <- element_gift_filt %>%
  select(-sample) %>%
  group_by(treatment) %>%
  summarise(across(everything(), mean)) %>%
  t() %>%
  row_to_names(row_number = 1) %>%
  as.data.frame() %>%
  mutate_if(is.character, as.numeric) %>%
  rownames_to_column(., "Elements") %>%
  left_join(.,uniqueGIFT_db[c(1,3,4)],by = join_by(Elements == Code_element)) %>% 
  arrange(Function) %>% 
  mutate(Difference=FMT-Transplant)%>% 
  mutate(group_color = ifelse(Difference <0, "Transplant","FMT")) 
means_gift <- element_gift_filt %>% 
  select(-treatment) %>% 
  pivot_longer(!sample, names_to = "Elements", values_to = "abundance") %>% 
  left_join(sample_metadata, by=join_by(sample==sample)) %>% 
  group_by(treatment, Elements) %>%
  summarise(mean=mean(abundance))

log_fold <- means_gift %>%
  group_by(Elements) %>%
  summarise(
    logfc_fmt_transplant = log2(mean[treatment == "FMT"] / mean[treatment == "Transplant"])
    ) %>% 
  left_join(., difference_table, by="Elements")
difference_table %>%
  ggplot(aes(x=forcats::fct_reorder(Function,Difference), y=Difference, fill=group_color)) + 
  geom_col() +
#  geom_point(size=4) + 
  scale_fill_manual(values=treatment_colors) +
  geom_hline(yintercept=0) + 
  coord_flip()+
  theme(axis.text = element_text(size = 10),
        axis.title = element_text(size = 12),
        legend.position = "right", 
        panel.background = element_blank(),
          panel.grid.major = element_line(size = 0.15, linetype = 'solid',
                                colour = "grey"))+
  xlab("Microbial Functional Capacity") + 
  ylab("Mean difference")+
  labs(fill="Treatment")

log_fold %>%
  filter(Elements!="D0611") %>% 
  ggplot(aes(x=forcats::fct_reorder(Function,logfc_fmt_transplant), y=logfc_fmt_transplant, fill=group_color)) + 
  geom_col() +
  scale_fill_manual(values=treatment_colors) +
  geom_hline(yintercept=0) + 
  coord_flip()+
  theme(axis.text = element_text(size = 10),
        axis.title = element_text(size = 12),
        legend.position = "right", 
        panel.background = element_blank(),
          panel.grid.major = element_line(size = 0.15, linetype = 'solid',
                                colour = "grey"))+
  xlab("Microbial Functional Capacity") + 
  ylab("Log_fold")+
  labs(fill="Treatment")

#### Community functions differences

MCI

GIFTs_functions_community %>%
  rowMeans() %>%
  as_tibble(., rownames = "sample") %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>%
  group_by(treatment) %>%
  summarise(MCI = mean(value), sd = sd(value))
# A tibble: 2 × 3
  treatment    MCI     sd
  <chr>      <dbl>  <dbl>
1 FMT        0.349 0.0221
2 Transplant 0.355 0.0377
MCI <- GIFTs_functions_community %>%
  rowMeans() %>%
  as_tibble(., rownames = "sample") %>%
  left_join(sample_metadata, by = join_by(sample == sample)) 

shapiro.test(MCI$value)

    Shapiro-Wilk normality test

data:  MCI$value
W = 0.87044, p-value = 0.001713
wilcox.test(value ~ treatment, data=MCI)

    Wilcoxon rank sum exact test

data:  value by treatment
W = 121, p-value = 0.6801
alternative hypothesis: true location shift is not equal to 0
function_gift <- GIFTs_functions_community %>% 
  as.data.frame() %>% 
  rownames_to_column(., "sample") %>% 
  merge(., sample_metadata[c(12,13)], by="sample")
unique_funct_db<- GIFT_db[c(3,4,5)] %>% 
  distinct(Code_function, .keep_all = TRUE)

significant_functional <- function_gift %>%
    pivot_longer(-c(sample,treatment), names_to = "trait", values_to = "value") %>%
    group_by(trait) %>%
    summarise(p_value = wilcox.test(value ~ treatment)$p.value) %>%
    mutate(p_adjust=p.adjust(p_value, method="BH")) %>%
    filter(p_adjust < 0.05)%>%
  left_join(.,unique_funct_db[c(1,3)],by = join_by(trait == Code_function))

significant_functional
# A tibble: 3 × 4
  trait  p_value p_adjust Function               
  <chr>    <dbl>    <dbl> <chr>                  
1 B04   0.000220  0.00441 SCFA biosynthesis      
2 B10   0.00284   0.0189  Antibiotic biosynthesis
3 S02   0.00174   0.0174  Appendages             

  1. University of Copenhagen, ↩︎

  2. Sociedad de Ciencias Aranzadi-Departamento de Herpetología, ↩︎

  3. University of Copenhagen ↩︎

  4. University of Copenhagen, ↩︎